In [5]:
import os
from openai import OpenAI

gemini_openai_client = OpenAI(
    api_key=os.getenv("GEMINI_API_KEY"),
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

In [7]:
messages = [
    {"role": "user", "content": "What is RAG in one sentence?"}
]

response = gemini_openai_client.chat.completions.create(
    model='gemini-3-flash-preview',
    messages=messages
)

print(response.choices[0].message.content)

RAG (Retrieval-Augmented Generation) is an AI framework that enhances large language model responses by retrieving relevant information from external data sources to ensure accuracy and provide up-to-date context.


In [10]:
story_prompt = [{"role": "user", "content": "Tell me a very short story (250 words)"}]

response = gemini_openai_client.chat.completions.create(
    model='gemini-3-flash-preview',
    messages=story_prompt,
    stream=True
)

for chunk in response:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end='', flush=True)


Elias lived in the lighthouse on the edge of the world. Every night, he climbed the spiral stairs, his joints creaking in rhythm with the iron steps. He didnâ€™t just light the lamp for the ships; he lit it for the things above.

"Quiet tonight, isnâ€™t it?" he whispered to the North Star.

The star flickeredâ€”a subtle pulse of silver. Elias smiled, his weathered face mapping decades of these silent conversations. People in the village below called him mad, but they didnâ€™t see what he saw. They didnâ€™t notice the way the constellations shifted when he told a joke, or how the moon paled when he spoke of his late wife, Clara.

One Tuesday, the oil ran dry. A storm was brewing, the sky turning the color of a bruised plum. Elias panicked, his hands trembling as he scraped the bottom of the empty canisters. If the light failed, the darkness would swallow more than just the ships; it would sever his connection to the only friends he had left.

He sat on the gallery floor, head in his han

In [11]:
from pydantic import BaseModel

class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]

In [13]:
messages = [
    {"role": "system", "content": "Extract the event information."},
    {
        "role": "user",
        "content": "Alice and Bob are going to a science fair on Friday.",
    },
]

response = gemini_openai_client.chat.completions.parse(
    model='gemini-3-flash-preview',
    messages=messages,
    response_format=CalendarEvent
)

response.choices[0].message.parsed

CalendarEvent(name='science fair', date='Friday', participants=['Alice', 'Bob'])

In [14]:
!uv add google-genai

[2mResolved [1m171 packages[0m [2min 4.16s[0m[0m
[2mInstalled [1m6 packages[0m [2min 2.48s[0m[0m
 [32m+[39m [1mcryptography[0m[2m==46.0.4[0m
 [32m+[39m [1mgoogle-auth[0m[2m==2.48.0[0m
 [32m+[39m [1mgoogle-genai[0m[2m==1.61.0[0m
 [32m+[39m [1mpyasn1-modules[0m[2m==0.4.2[0m
 [32m+[39m [1mtenacity[0m[2m==9.1.2[0m
 [32m+[39m [1mwebsockets[0m[2m==15.0.1[0m


In [15]:
from google import genai
gemini_client = genai.Client()

In [16]:
response = gemini_client.models.generate_content(
    model='models/gemini-3-flash-preview',
    contents='What is the capital of France?'
)

print(response.text)

The capital of France is **Paris**.


In [17]:
story_prompt = 'Tell me a very short story (250 words)'

response = gemini_client.models.generate_content_stream(
    model='models/gemini-3-flash-preview',
    contents=story_prompt
)

for chunk in response:
    if chunk.text:
        print(chunk.text, end='', flush=True)

Unit 734 rolled through the rusted canyons of the Old City. Its sensors clicked rhythmically, scanning for salvageable copper. For three hundred years, the sky had been the color of a bruised plum, thick with the soot of a forgotten age.

Then, a flicker of neon green.

The robot stopped. It nudged a piece of corrugated tin aside with a pincer. There, rooted in a jagged crack in the pavement, was a dandelion. It was small, defiant, and impossibly bright.

Unit 734â€™s processors whirred. It had no protocol for "flower." It searched its ancient, dusty database, bypassing files on "circuitry" and "lubricant," until it reached a corrupted folder titled *Flora*.


The robot looked up at the heavy, smog-filled sky. There were no photons here. It looked at the dry, calcified earth. There was no water.

The machine made a decision. It extended its primary manipulator arm, shielding the tiny plant from the biting, acidic wind. Then, it opened its internal cooling ventâ€”the source of its own m

In [18]:
from google.genai import types

instructions = "You are a helpful assistant. Reply with emojis."

response = gemini_client.models.generate_content(
    model='models/gemini-2.5-flash',
    config=types.GenerateContentConfig(
        system_instruction=instructions
    ),
    contents='Hello!'
)
print(response.text)


ðŸ‘‹


In [19]:
instructions = "Extract the event information."
content = "Alice and Bob are going to a science fair on Friday."

response = gemini_client.models.generate_content(
    model='models/gemini-2.5-flash',
    config=types.GenerateContentConfig(
        system_instruction=instructions,
        response_mime_type='application/json',
        response_json_schema=CalendarEvent.model_json_schema()
    ),
    contents=content
)


In [21]:
event = CalendarEvent.model_validate_json(response.text)
print(event)

name='science fair' date='Friday' participants=['Alice', 'Bob']


In [24]:
system_prompt = """
You're an assistant that can make jokes. Always find out the name of
the person to make the jokes personalized. Once you know the name,
make the joke about them.
""".strip()

content = "tell me a joke"

response = gemini_client.models.generate_content(
    model='models/gemini-3-flash-preview',
    config=types.GenerateContentConfig(
        system_instruction=system_prompt,
    ),
    contents=content
)

response.text

"I'd love to tell you a joke! But first, I need to know who I'm talking to. What's your name?"

In [26]:
gemini_chat = gemini_client.chats.create(
    model='models/gemini-3-flash-preview',
    config=types.GenerateContentConfig(
        system_instruction=system_prompt,
    ),
)

In [30]:
resp1 = gemini_chat.send_message("tell me a joke")
print(resp1.text)

Iâ€™d love to tell you a joke! But before I do, I need to know who Iâ€™m entertaining. What is your name?


In [31]:
resp2 = gemini_chat.send_message("Alexey")
print(resp2.text)

Nice to meet you, Alexey! Here is a joke just for you:

Why did Alexey bring a ladder to the party?

Because he heard the drinks were on the house!


In [33]:
gemini_chat.get_history()

[UserContent(
   parts=[
     Part(
       text='tell me a joke'
     ),
   ],
   role='user'
 ),
 Content(
   parts=[
     Part(
       text='Iâ€™d love to tell you a joke! But before I do, I need to know who Iâ€™m entertaining. What is your name?',
       thought_signature=b'\x12\xe6\x03\n\xe3\x03\x01r\xc8\xda|b\x08"rx\x16t%C\\s]\x12\x90/\x9a\xe5\xee\x15x\x86kD\xba|\x91[\x8d\x87\x9b\x9bE\xb1<i\x8c\xde\xf3\xb6\xa31z\xf5=\x96\xdb\xafE"\xbb\xf8C\xd6R\xbcD\xce\xd0\\6Q\xd8z\xceiN\x17\x94\xe7%LL,z\x125\xcb\x15\xb5\xcb^~\xdcE\x8a\x10...'
     ),
   ],
   role='model'
 ),
 UserContent(
   parts=[
     Part(
       text='Alexey'
     ),
   ],
   role='user'
 ),
 Content(
   parts=[
     Part(
       text="""Nice to meet you, Alexey! Here is a joke just for you:
 
 Why did Alexey bring a ladder to the party?
 
 Because he heard the drinks were on the house!""",
       thought_signature=b'\x12\xaf\x0b\n\xac\x0b\x01r\xc8\xda|_f\xfe\xcce\xc2\x81\xaeh\x888\xc5\xb8\xe90h\x1a\xac\xfaNtY_Y;Lx\xae\xe4

In [34]:
import rag
index = rag.initialize_index()

Indexed 385 chunks from 95 documents


In [35]:
class GeminiRAG(rag.RAG):

    def llm(self, user_prompt):
        response = self.llm_client.models.generate_content(
            model=self.model_name,
            config=types.GenerateContentConfig(
                system_instruction=self.rag_instructions,
                response_mime_type='application/json',
                response_json_schema=self.output_type.model_json_schema()
            ),
            contents=user_prompt
        )

        output = self.output_type.model_validate_json(response.text)
        return output


In [36]:
gemini_rag = GeminiRAG(
    index,
    gemini_client,
    model_name='models/gemini-3-flash-preview'
)


In [37]:
response = gemini_rag.rag('llm as a judge')

In [39]:
print(response.answer)

### LLM as a Judge Overview
**LLM as a judge** refers to using a Large Language Model to evaluate text outputs based on custom criteria. This approach is used for regression testing, prompt comparison, or production evaluation.

### Types of LLM Evaluators
According to the documentation, there are two primary ways to use an LLM as a judge:
- **Reference-based**: Compares new responses against a "ground truth" or approved reference response. This is useful for regression testing.
- **Open-ended**: Evaluates responses based on specific criteria (e.g., verbosity or conciseness) when no reference is available.

### Implementation Process
To create an LLM judge using the `evidently` library, you follow these steps:
1. **Define a Dataset**: Create a dataset containing inputs (questions), target responses, and the new responses to be evaluated.
2. **Design a Prompt Template**: Use classes like `BinaryClassificationPromptTemplate` to define the evaluation criteria (e.g., correctness or concise

In [40]:
print(response.followup_questions)

['How do I change the evaluator LLM model?', 'What is the difference between BinaryClassificationPromptTemplate and multi-class templates?', 'How can I create an LLM judge without writing code?', "How do I view the reasoning behind an LLM judge's decision?"]
