In [1]:
import os
from openai import OpenAI

grok_client = OpenAI(
    api_key=os.getenv("GROK_API_KEY"),
    base_url="https://api.x.ai/v1"
)

In [2]:
messages = [
    {"role": "user", "content": "What is 2+2?"}
]

response = grok_client.chat.completions.create(
    model='grok-4-1-fast-reasoning',
    messages=messages
)
print(response.choices[0].message.content)


**4**

(That's basic arithmetic: 2 + 2 = 4.)


In [4]:
story_prompt = [{"role": "user", "content": "Tell me a very short story (250 words)"}]

response = grok_client.chat.completions.create(
    model='grok-4-1-fast-reasoning',
    messages=story_prompt,
    stream=True
)

for chunk in response:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end='', flush=True)


### The Last Signal

In the dim glow of her cockpit, Captain Elara gripped the controls of the *Stellar Drift*, hurtling through the void toward Proxima Centauri. Alarms blared—oxygen failing, hull breached. She'd been alone for 47 years, chasing the faint signal that promised life beyond Sol.

"Mayday, mayday," she whispered into the comms, voice hoarse. "This is Elara Voss. Anyone...?"

Static. Then, a reply: "Elara? It's me. Dad."

Her heart seized. Impossible. He'd died launching her ship. A glitch? AI hallucination?

"Coordinates locked," the voice continued, warm and familiar. "Docking in three."

Trembling, she watched a sleek vessel emerge from the nebula, matching her velocity. The airlock hissed open. Footsteps echoed.

There he stood—older, grayer, but alive. "Surprise, kiddo. Cryosleep tech finally worked. Followed you all this way."

Tears blurred her vision as they embraced. But as the hatch sealed behind him, she glimpsed the truth: his eyes glowed faintly, circuits humm

In [5]:
from pydantic import BaseModel

class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]


In [6]:
content = "Alice and Bob are going to a science fair on Friday."

response = grok_client.chat.completions.parse(
    model='grok-4-1-fast-reasoning',
    messages=[{"role": "user", "content": content}],
    response_format=CalendarEvent
)

event = response.choices[0].message.parsed
print(event)


name='Science Fair' date='Friday' participants=['Alice', 'Bob']


In [7]:
import rag
index = rag.initialize_index()

Indexed 385 chunks from 95 documents


In [8]:
class ChatCompletionsRAG(rag.RAG):

    def llm(self, user_prompt):
        messages = [
            {"role": "system", "content": self.rag_instructions},
            {"role": "user", "content": user_prompt}
        ]

        response = self.llm_client.chat.completions.parse(
            model=self.model_name,
            messages=messages,
            response_format=self.output_type
        )

        return response.choices[0].message.parsed


In [9]:
responses_rag = ChatCompletionsRAG(
    index,
    grok_client,
    model_name='grok-4-1-fast-reasoning'
)
response = responses_rag.rag("llm as a judge")
print(response.answer[:50] + "...")

# LLM as a Judge

This is a tutorial on how to eva...


In [10]:
response.followup_questions

['How to install and set up Evidently for LLM judges?',
 'What is a reference-based LLM evaluator?',
 'How to create a BinaryClassificationPromptTemplate?',
 'How to evaluate the quality of the LLM judge?',
 'How to upload results to Evidently Cloud?']