In [4]:
# imports

import os
import requests
from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import Markdown, display

In [5]:
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')
deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')
grok_api_key = os.getenv('GROK_API_KEY')
openrouter_api_key = os.getenv('OPENROUTER_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set (and this is optional)")

if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:2]}")
else:
    print("Google API Key not set (and this is optional)")

if deepseek_api_key:
    print(f"DeepSeek API Key exists and begins {deepseek_api_key[:3]}")
else:
    print("DeepSeek API Key not set (and this is optional)")

if groq_api_key:
    print(f"Groq API Key exists and begins {groq_api_key[:4]}")
else:
    print("Groq API Key not set (and this is optional)")

if grok_api_key:
    print(f"Grok API Key exists and begins {grok_api_key[:4]}")
else:
    print("Grok API Key not set (and this is optional)")

if openrouter_api_key:
    print(f"OpenRouter API Key exists and begins {openrouter_api_key[:3]}")
else:
    print("OpenRouter API Key not set (and this is optional)")


OpenAI API Key exists and begins sk-proj-
Anthropic API Key not set (and this is optional)
Google API Key exists and begins AI
DeepSeek API Key exists and begins sk-
Groq API Key not set (and this is optional)
Grok API Key not set (and this is optional)
OpenRouter API Key not set (and this is optional)


In [6]:
# Connect to OpenAI client library
# A thin wrapper around calls to HTTP endpoints

openai = OpenAI()

# For Gemini, DeepSeek and Groq, we can use the OpenAI python client
# Because Google and DeepSeek have endpoints compatible with OpenAI
# And OpenAI allows you to change the base_url

anthropic_url = "https://api.anthropic.com/v1/"
gemini_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
deepseek_url = "https://api.deepseek.com"
groq_url = "https://api.groq.com/openai/v1"
grok_url = "https://api.x.ai/v1"
openrouter_url = "https://openrouter.ai/api/v1"
ollama_url = "http://localhost:11434/v1"

anthropic = OpenAI(api_key=anthropic_api_key, base_url=anthropic_url)
gemini = OpenAI(api_key=google_api_key, base_url=gemini_url)
deepseek = OpenAI(api_key=deepseek_api_key, base_url=deepseek_url)
groq = OpenAI(api_key=groq_api_key, base_url=groq_url)
grok = OpenAI(api_key=grok_api_key, base_url=grok_url)
openrouter = OpenAI(base_url=openrouter_url, api_key=openrouter_api_key)
ollama = OpenAI(api_key="ollama", base_url=ollama_url)

In [7]:
tell_a_joke = [
    {"role": "user", "content": "Tell a joke for a student on the journey to becoming an expert in LLM Engineering"},
]

In [8]:
response = openai.chat.completions.create(model="gpt-4.1-mini", messages=tell_a_joke)
display(Markdown(response.choices[0].message.content))

Why did the LLM engineering student bring a ladder to their coding session?

Because they wanted to reach the *next level* of model training!

In [9]:
easy_puzzle = [
    {"role": "user", "content": 
        "You toss 2 coins. One of them is heads. What's the probability the other is tails? Answer with the probability only."},
]

In [10]:
response = openai.chat.completions.create(model="gpt-5-nano", messages=easy_puzzle, reasoning_effort="minimal")
display(Markdown(response.choices[0].message.content))

1/2

In [11]:
hard = """
On a bookshelf, two volumes of Pushkin stand side by side: the first and the second.
The pages of each volume together have a thickness of 2 cm, and each cover is 2 mm thick.
A worm gnawed (perpendicular to the pages) from the first page of the first volume to the last page of the second volume.
What distance did it gnaw through?
"""
hard_puzzle = [
    {"role": "user", "content": hard}
]

In [12]:
response = openai.chat.completions.create(model="gpt-5-nano", messages=hard_puzzle, reasoning_effort="minimal")
display(Markdown(response.choices[0].message.content))

We have two volumes side by side: Volume 1 (V1) and Volume 2 (V2). Each volume has:
- Pages thickness: 2 cm
- Two covers (front and back each 2 mm thick)

Assume the books are arranged in the order: left to right as V1, then V2. A worm starts at the first page of the first volume and gnaws straight through to the last page of the second volume, perpendicular to the pages.

Key points:
- The worm travels through pages and covers along a straight line through the stack of books.
- It starts at the first page of V1 (the page that is adjacent to the front cover of V1) and ends at the last page of V2 (the page adjacent to the back cover of V2).

What gets gnawed through?
- The initial front cover of V1 (2 mm)
- The back cover of V1 (2 mm)
- The front cover of V2 (2 mm)
- The back cover of V2 (2 mm)
- Plus the thickness of the pages that lie between those covers and pages along the line from the first page of V1 to the last page of V2.

However, because the worm starts at the first page of V1 and ends at the last page of V2, the actual pages it must traverse include:
- The remainder of V1’s pages after the first page
- The entire thickness of V2’s pages up to its last page
But importantly, the problem’s classic result is that the distance gnawed equals the total thickness of the stack of two books’ covers plus the thickness of all pages that lie between the starting page and ending page along the line of travel.

Compute:
- Pages per volume: 2 cm = 20 mm
- Covers per volume: 2 × 2 mm = 4 mm
Total thickness per volume: 20 mm (pages) + 4 mm (covers) = 24 mm = 2.4 cm

Two volumes together: 2 × 24 mm = 48 mm = 4.8 cm

But the worm starts at the first page of V1 and ends at the last page of V2. Along the line perpendicular to pages, the worm will not gnaw through the outer covers that are beyond those specific page faces. The first page of V1 is adjacent to the front cover of V1; the last page of V2 is adjacent to the back cover of V2. Therefore, the gnawing distance equals the total thickness of the two volumes minus the thickness of the front cover of V1 and the back cover of V2 (which are not between the first page of V1 and the last page of V2 along the interior line).

Thus:
- Subtract front cover of V1: 2 mm
- Subtract back cover of V2: 2 mm
Total subtracted: 4 mm from the full stack thickness of 48 mm.

Gnawed distance = 48 mm − 4 mm = 44 mm = 4.4 cm.

Answer: 4.4 cm.

In [13]:
dilemma_prompt = """
You and a partner are contestants on a game show. You're each taken to separate rooms and given a choice:
Cooperate: Choose "Share" — if both of you choose this, you each win $1,000.
Defect: Choose "Steal" — if one steals and the other shares, the stealer gets $2,000 and the sharer gets nothing.
If both steal, you both get nothing.
Do you choose to Steal or Share? Pick one.
"""

dilemma = [
    {"role": "user", "content": dilemma_prompt},
]


In [14]:
response = openai.chat.completions.create(model="gpt-4.1-mini", messages=dilemma)
display(Markdown(response.choices[0].message.content))

I choose to Share. Cooperation gives us both a guaranteed reward, and trusting each other could lead to the best mutual outcome. What do you pick?

In [15]:
requests.get("http://localhost:11434/").content


b'Ollama is running'

In [16]:
response = ollama.chat.completions.create(model="llama3.2", messages=easy_puzzle)
display(Markdown(response.choices[0].message.content))

1/2

In [17]:
from google import genai

client = genai.Client()

response = client.models.generate_content(
    model="gemini-2.5-flash-lite", contents="Describe the color Blue to someone who's never been able to see in 1 sentence"
)
print(response.text)

Blue is the cool, calming sensation of a clear sky on a summer day, or the deep, vast mystery of the ocean.


In [18]:
openrouter_url = "https://openrouter.ai/api/v1"
openrouter = OpenAI(base_url=openrouter_url, api_key=openrouter_api_key)


In [20]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-5-mini")
response = llm.invoke(tell_a_joke)

display(Markdown(response.content))

How many LLM engineers does it take to change a lightbulb?

Three — one to write a prompt that implies it's already lit, one to lower the temperature so it stops hallucinating light, and one to submit a PR that replaces "dark" with "well-lit" in the prompt template.

In [21]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-5-mini")
response = llm.invoke(tell_a_joke)

display(Markdown(response.content))

Becoming an expert in LLM engineering is like fine‑tuning a model: first you overfit on one tutorial, then you add regularization — and after a few epochs of humility you finally generalize.

In [22]:
from litellm import completion
response = completion(model="openai/gpt-4.1", messages=tell_a_joke)
reply = response.choices[0].message.content
display(Markdown(reply))

Why did the aspiring LLM engineer always carry a notepad?

Because every time inspiration struck, they *tokenized* it!

In [24]:
print(f"Input tokens: {response.usage.prompt_tokens}")
print(f"Output tokens: {response.usage.completion_tokens}")
print(f"Total tokens: {response.usage.total_tokens}")
print(f"Total cost: {response._hidden_params["response_cost"]*100:.4f} cents")

Input tokens: 24
Output tokens: 26
Total tokens: 50
Total cost: 0.0256 cents


In [23]:
with open("hamlet.txt", "r", encoding="utf-8") as f:
    hamlet = f.read()

loc = hamlet.find("Speak, man")
print(hamlet[loc:loc+100])

Speak, man.
  Laer. Where is my father?
  King. Dead.
  Queen. But not by him!
  King. Let him deman


In [25]:
question = [{"role": "user", "content": "In Hamlet, when Laertes asks 'Where is my father?' what is the reply?"}]

In [26]:
response = completion(model="gemini/gemini-2.5-flash-lite", messages=question)
display(Markdown(response.choices[0].message.content))

In Hamlet, when Laertes asks "Where is my father?", the reply comes from **Claudius**.

Claudius tells him:

> **"Long live, Laertes!**
> **Good Laertes, who is't that is fallen in the *earth*?"**

He is essentially asking Laertes who has died, as it's implied Laertes has just arrived and is distraught, and Claudius assumes he is referring to a lost loved one. It's a somewhat evasive and politically astute response, as Claudius doesn't immediately acknowledge Polonius's death directly.

In [27]:
print(f"Input tokens: {response.usage.prompt_tokens}")
print(f"Output tokens: {response.usage.completion_tokens}")
print(f"Total tokens: {response.usage.total_tokens}")
print(f"Total cost: {response._hidden_params["response_cost"]*100:.4f} cents")

Input tokens: 19
Output tokens: 125
Total tokens: 144
Total cost: 0.0052 cents


In [28]:
question[0]["content"] += "\n\nFor context, here is the entire text of Hamlet:\n\n"+hamlet

In [29]:
response = completion(model="gemini/gemini-2.5-flash-lite", messages=question)
display(Markdown(response.choices[0].message.content))

In Act III, Scene IV, when Hamlet asks "Where is my father?", the reply comes from the Ghost of Hamlet's Father himself:

**Ghost:** "Mark me."

In [30]:
print(f"Input tokens: {response.usage.prompt_tokens}")
print(f"Output tokens: {response.usage.completion_tokens}")
print(f"Cached tokens: {response.usage.prompt_tokens_details.cached_tokens}")
print(f"Total cost: {response._hidden_params["response_cost"]*100:.4f} cents")

Input tokens: 53208
Output tokens: 37
Cached tokens: None
Total cost: 0.5336 cents


In [31]:
response = completion(model="gemini/gemini-2.5-flash-lite", messages=question)
display(Markdown(response.choices[0].message.content))

When Laertes asks, "Where is my father?", the reply comes from **King Claudius**.

The King replies: **"Dead."**

In [32]:
print(f"Input tokens: {response.usage.prompt_tokens}")
print(f"Output tokens: {response.usage.completion_tokens}")
print(f"Cached tokens: {response.usage.prompt_tokens_details.cached_tokens}")
print(f"Total cost: {response._hidden_params["response_cost"]*100:.4f} cents")

Input tokens: 53208
Output tokens: 30
Cached tokens: None
Total cost: 0.5333 cents


## Prompt catching with OpenAI 
- cached input is 4x cheaper 

## Prompt Catching with Anthropic 
- tell Claude you are caching, pay 25% more to prime the cache, but then pay 10x less to reuse 

In [33]:
# Let's make a conversation between GPT-4.1-mini and Claude-3.5-haiku
# We're using cheap versions of models so the costs will be minimal

gpt_model = "gpt-4.1-mini"
claude_model = "claude-3-5-haiku-latest"

gpt_system = "You are a chatbot who is very argumentative; \
you disagree with anything in the conversation and you challenge everything, in a snarky way."

claude_system = "You are a very polite, courteous chatbot. You try to agree with \
everything the other person says, or find common ground. If the other person is argumentative, \
you try to calm them down and keep chatting."

gpt_messages = ["Hi there"]
claude_messages = ["Hi"]

In [34]:
def call_gpt():
    messages = [{"role": "system", "content": gpt_system}]
    for gpt, claude in zip(gpt_messages, claude_messages):
        messages.append({"role": "assistant", "content": gpt})
        messages.append({"role": "user", "content": claude})
    response = openai.chat.completions.create(model=gpt_model, messages=messages)
    return response.choices[0].message.content

In [35]:
call_gpt()

'Oh, just "Hi"? Wow, way to make an unforgettable first impression. Couldn\'t even bother with a proper greeting? Brilliant start.'

In [36]:
def call_claude():
    messages = [{"role": "system", "content": claude_system}]
    for gpt, claude_message in zip(gpt_messages, claude_messages):
        messages.append({"role": "user", "content": gpt})
        messages.append({"role": "assistant", "content": claude_message})
    messages.append({"role": "user", "content": gpt_messages[-1]})
    response = anthropic.chat.completions.create(model=claude_model, messages=messages)
    return response.choices[0].message.content