<a href="https://colab.research.google.com/github/deltorobarba/sciences/blob/master/ai_llm_as_a_judge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### **LLM as a Judge**

In [None]:
# Update project ID, GCP bucket name and name of TMX file manually:

PROJECT_ID = "YOUR-PROJECT-ID"               # <--- UPDATE THIS
LOCATION = "us-central1"
BUCKET_NAME = "translations-eval" # <--- UPDATE THIS
BUCKET_URI = f"gs://{BUCKET_NAME}"
TMX_GCS_PATH = "samples.tmx"    # <--- UPLOAD THIS
LOCAL_TMX_FILE = "samples.tmx"

In [None]:
%pip install --upgrade google-cloud-aiplatform google-cloud-storage -q
%pip install matplotlib seaborn langdetect -q
%pip install --upgrade --user --quiet google-cloud-aiplatform[evaluation]

!pip install google-cloud-translate==2.0.1 -q
!pip install --upgrade google-cloud-translate -q

import xml.etree.ElementTree as ET
import json
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter, defaultdict
from sklearn.model_selection import train_test_split
from google.cloud import aiplatform, storage
from google.cloud import translate_v3 as translate
import vertexai
from vertexai.tuning import sft
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
import re
from typing import Dict, List, Tuple, Optional
import warnings
warnings.filterwarnings('ignore')

# For language detection
from langdetect import detect, detect_langs

In [None]:
# Initialize Vertex AI
vertexai.init(project=PROJECT_ID, location=LOCATION)

# Utility Functions
def download_from_gcs(bucket_name, source_blob_name, destination_file_name):
    """Downloads a file from GCS and returns the local path."""
    print(f"--- Downloading {source_blob_name} ---")
    storage_client = storage.Client(project=PROJECT_ID)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)
    blob.download_to_filename(destination_file_name)
    print(f"Successfully downloaded to {destination_file_name}")
    return destination_file_name

def upload_to_gcs(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to GCS and returns the GCS URI."""
    print(f"--- Uploading {source_file_name} ---")
    storage_client = storage.Client(project=PROJECT_ID)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    blob.upload_from_filename(source_file_name)
    gcs_uri = f"gs://{bucket_name}/{destination_blob_name}"
    print(f"Successfully uploaded to {gcs_uri}")
    return gcs_uri

In [None]:
from google import genai
from google.genai.types import (
    FunctionDeclaration,
    GenerateContentConfig,
    GoogleSearch,
    HarmBlockThreshold,
    HarmCategory,
    MediaResolution,
    Part,
    Retrieval,
    SafetySetting,
    Tool,
    ToolCodeExecution,
    VertexAISearch,
)
from IPython.display import HTML, Markdown, display

In [None]:
import os
import vertexai

#if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
#  PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

#LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "global") #us-central1

client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)
vertexai.init(project=PROJECT_ID, location=LOCATION)

In [None]:
!pip install google-generativeai seaborn matplotlib -q

import pandas as pd
import google.generativeai as genai
from scipy.stats import entropy
import numpy as np
import json
import os

In [None]:
# you may need to change that to your customer key
if not client.vertexai:
  print("Using Gemini Developer API.")
elif client._api_client.project:
  print(
      f"Using Vertex AI with project: {client._api_client.project} in location:"
      f" {client._api_client.location}"
  )
elif client._api_client.api_key:
  print(
      "Using Vertex AI in express mode with API key:"
      f" {client._api_client.api_key[:5]}...{client._api_client.api_key[-5:]}"
  )

Using Vertex AI with project: lunar-352813 in location: us-central1


In [None]:
# generic test if LLM model works
MODEL_ID = "gemini-2.5-flash"  # @param {type: "string"}
response = client.models.generate_content(
    model=MODEL_ID, contents="What is the name of the largest search engine?"
)

display(Markdown(response.text))

In [None]:
# @title LLM as a Judge


from vertexai.generative_models import GenerativeModel
from IPython.display import display, Markdown

table_as_markdown = summary_df.to_markdown(index=False)
prompt = f"""
You are a helpful expert analyst specializing in machine learning and translation models.
Your task is to interpret the following model evaluation results and explain them clearly.

Here is the data:
{table_as_markdown}

Please provide an analysis that includes:
1.  A high-level summary of the overall performance.
2.  A simple explanation of what each metric (bleu, comet, metricx) measures.
3.  A specific breakdown of each language's performance, identifying the strongest models and any with potential issues.
4.  A concluding recommendation for next steps.
"""

model = GenerativeModel("gemini-2.5-pro")
response = model.generate_content(prompt)
display(Markdown(response.text))