# **LLM Judge Code** (using gpt-5-nano)

In [1]:
!pip install openai



In [2]:
from google.colab import drive
from openai import OpenAI
from pydantic import BaseModel
from google.colab import userdata
from IPython.display import HTML, Markdown
import os
import json
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# Helper function to get prompt from the data
def get_prompt(question, answer, reference):
  input = [{"role": "assistant", "content": f"""You are an expert judge evaluating answers from a Jeju tour guide LLM. Evaluate the answer based on the question and reference given.

Question: {question}

Answer: {answer}

Reference: {reference}

Score the answer from 1 (worst) to 5 (best) on the following criteria:
1. Relevance
2. Factual correctness
3. Helpfulness
4. Conciseness

Give a short explanation for your rating as well.
"""}]
  return input

# Format for GPT's Response API text_format
text_format = {
    "format": {
        "type": "json_schema",
        "name": "judge_response",
        "strict": True,
        "schema": {
            "type": "object",
            "properties": {
                "relevance": {"type":"number"},
                "factual_correctness": {"type":"number"},
                "helpfulness": {"type":"number"},
                "conciseness": {"type":"number"},
                "explanation": {"type":"string"}
            },
            "required": ["relevance", "factual_correctness", "helpfulness", "conciseness", "explanation"],
            "additionalProperties": False
        }
    }
}


In [36]:
# Import the data to be Judged
with open("./drive/MyDrive/hyperscale/results/openai_base_results.json") as f:
    data = json.load(f)

In [37]:
client = OpenAI()

In [38]:
# Main Loop for LLM as a Judge

i = 0
for entry in data:
  question = entry['question_text']
  generated_answer = entry['answer']
  reference = entry['reference']
  prompt = get_prompt(question, generated_answer, reference)
  response = client.responses.create(
      model="gpt-5-nano", # using gpt-5-nano, capable of disagreeing
      # tools=tools, ---- omitted web search because it takes too long, reference is assumed as correct answer
      input=prompt,
      text=text_format
  )
  response = response.output_text.strip()
  container = json.loads(response)
  entry['metrics']['llm_judge'] = container
  i += 1
  print(f"{i}/100")

1/100
2/100
3/100
4/100
5/100
6/100
7/100
8/100
9/100
10/100
11/100
12/100
13/100
14/100
15/100
16/100
17/100
18/100
19/100
20/100
21/100
22/100
23/100
24/100
25/100
26/100
27/100
28/100
29/100
30/100
31/100
32/100
33/100
34/100
35/100
36/100
37/100
38/100
39/100
40/100
41/100
42/100
43/100
44/100
45/100
46/100
47/100
48/100
49/100
50/100
51/100
52/100
53/100
54/100
55/100
56/100
57/100
58/100
59/100
60/100
61/100
62/100
63/100
64/100
65/100
66/100
67/100
68/100
69/100
70/100
71/100
72/100
73/100
74/100
75/100
76/100
77/100
78/100
79/100
80/100
81/100
82/100
83/100
84/100
85/100
86/100
87/100
88/100
89/100
90/100
91/100
92/100
93/100
94/100
95/100
96/100
97/100
98/100
99/100
100/100


In [39]:
results_path = "./drive/MyDrive/hyperscale/results/openai_base_results_v2.json"

In [40]:
with open(results_path, 'w', encoding='utf-8') as outfile:
    json.dump(data, outfile, indent=2, ensure_ascii=False)

In [40]:
# NO NEED TO RUN, just to show schema of JSON
entry

{'question_id': 0,
 'question_text': 'How far is Petrichor Jeju from Gureumri Beach? Is it possible to walk there with children?',
 'answer': 'Here is some information on how to get from Petrichor Jeju to Gureumri Beach.\n\n**Distance and Travel Time**\n- Distance: Approximately 12.3km\n- Travel Time: Approximately 1 hour and 15 minutes by car\n- Route: Take the Gudeok-ro route\n\n**Recommended Travel Plan**\n\n1. Morning Departure\n- Departure time: 8:00 AM\n- Arrival time at Gureumri Beach: 9:15 AM\n- Recommended time to spend: 2-3 hours\n- Recommended course: Walk along the beach, enjoy the ocean view, and take a break at a nearby cafe\n\n2. Afternoon Return\n- Return time: 12:00 PM\n- Estimated return time: 1:15 PM\n- Recommended course: Return to Petrichor Jeju and enjoy the afternoon\n\n**Things to note when traveling with children**\n\n- Gureumri Beach is a relatively flat area, so it is easy to walk\n- There is a parking lot, so it is convenient to use a car\n- There is a cafe 