In [2]:
import re
import json
from pydantic import BaseModel
from typing import List

from openai import OpenAI
client = OpenAI()

# The Beginning
### A simple prompt for some data
- let's prompt the model for some simple information

In [3]:
completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {
            "role": "user",
            "content": "What are the radii and masses of the planets in our solar system?"
        }
    ]
)

In [5]:
print(completion.choices[0].message.content)

Here is a summary of the average radii and masses of the planets in our solar system:

1. **Mercury**
   - Radius: 2,439.7 km
   - Mass: \(3.301 \times 10^{23}\) kg

2. **Venus**
   - Radius: 6,051.8 km
   - Mass: \(4.867 \times 10^{24}\) kg

3. **Earth**
   - Radius: 6,371 km
   - Mass: \(5.972 \times 10^{24}\) kg

4. **Mars**
   - Radius: 3,389.5 km
   - Mass: \(6.417 \times 10^{23}\) kg

5. **Jupiter**
   - Radius: 69,911 km
   - Mass: \(1.898 \times 10^{27}\) kg

6. **Saturn**
   - Radius: 58,232 km
   - Mass: \(5.683 \times 10^{26}\) kg

7. **Uranus**
   - Radius: 25,362 km
   - Mass: \(8.681 \times 10^{25}\) kg

8. **Neptune**
   - Radius: 24,622 km
   - Mass: \(1.024 \times 10^{26}\) kg

These values give a general idea of the sizes and masses of the planets, though individual measurements can vary somewhat based on the methods and standards used.


# JSON Anyone?
### What if we want it structured?
- we can try manually asking the model for structured output, such as JSON

In [6]:
completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {
            "role": "user",
            "content": "What are the radii and masses of the planets in our solar system? Please output your response as JSON."
        }
    ]
)

In [7]:
print(completion.choices[0].message.content)

Here is the information regarding the radii and masses of the planets in our solar system, presented in JSON format:

```json
{
  "planets": [
    {
      "name": "Mercury",
      "radius_km": 2439.7,
      "mass_kg": 3.3011e23
    },
    {
      "name": "Venus",
      "radius_km": 6051.8,
      "mass_kg": 4.8675e24
    },
    {
      "name": "Earth",
      "radius_km": 6371.0,
      "mass_kg": 5.9724e24
    },
    {
      "name": "Mars",
      "radius_km": 3389.5,
      "mass_kg": 6.4171e23
    },
    {
      "name": "Jupiter",
      "radius_km": 69911.0,
      "mass_kg": 1.8982e27
    },
    {
      "name": "Saturn",
      "radius_km": 58232.0,
      "mass_kg": 5.6834e26
    },
    {
      "name": "Uranus",
      "radius_km": 25362.0,
      "mass_kg": 8.6810e25
    },
    {
      "name": "Neptune",
      "radius_km": 24622.0,
      "mass_kg": 1.02413e26
    }
  ]
}
```

This JSON includes the names, radii in kilometers, and masses in kilograms for each of the eight planets in our sol

### Parsing
- it might not always be exactly just the structured output we suggest, but some regex and simple checks might solve that

In [9]:
response = completion.choices[0].message.content

pattern = r"```json(.*?)```"
matches = re.findall(pattern, response, re.DOTALL)
for match in matches:
    print(match.strip())

{
  "planets": [
    {
      "name": "Mercury",
      "radius_km": 2439.7,
      "mass_kg": 3.3011e23
    },
    {
      "name": "Venus",
      "radius_km": 6051.8,
      "mass_kg": 4.8675e24
    },
    {
      "name": "Earth",
      "radius_km": 6371.0,
      "mass_kg": 5.9724e24
    },
    {
      "name": "Mars",
      "radius_km": 3389.5,
      "mass_kg": 6.4171e23
    },
    {
      "name": "Jupiter",
      "radius_km": 69911.0,
      "mass_kg": 1.8982e27
    },
    {
      "name": "Saturn",
      "radius_km": 58232.0,
      "mass_kg": 5.6834e26
    },
    {
      "name": "Uranus",
      "radius_km": 25362.0,
      "mass_kg": 8.6810e25
    },
    {
      "name": "Neptune",
      "radius_km": 24622.0,
      "mass_kg": 1.02413e26
    }
  ]
}


- finally we can convert this to json

In [15]:
json_response = json.loads(match)
json_response

{'planets': [{'name': 'Mercury', 'radius_km': 2439.7, 'mass_kg': 3.3011e+23},
  {'name': 'Venus', 'radius_km': 6051.8, 'mass_kg': 4.8675e+24},
  {'name': 'Earth', 'radius_km': 6371.0, 'mass_kg': 5.9724e+24},
  {'name': 'Mars', 'radius_km': 3389.5, 'mass_kg': 6.4171e+23},
  {'name': 'Jupiter', 'radius_km': 69911.0, 'mass_kg': 1.8982e+27},
  {'name': 'Saturn', 'radius_km': 58232.0, 'mass_kg': 5.6834e+26},
  {'name': 'Uranus', 'radius_km': 25362.0, 'mass_kg': 8.681e+25},
  {'name': 'Neptune', 'radius_km': 24622.0, 'mass_kg': 1.02413e+26}]}

# Consistency
- is this structure consistent? can we gaurantee the model will generate the same JSON every time, and the same python dict object can be parsed each time?
- what if we define a python object, and every time we get a response we can create an instance of that object?

In [5]:
class Planet(BaseModel):
    name: str
    radius: float
    mass: float
    
class Result(BaseModel):
    planets: List[Planet]

completion = client.beta.chat.completions.parse(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {
            "role": "user",
            "content": "What are the radii and masses of the planets in our solar system? Please output your response as JSON."
        }
    ],
    response_format=Result
)

In [13]:
completion.model, completion.usage

('gpt-4o-mini-2024-07-18',
 CompletionUsage(completion_tokens=143, prompt_tokens=134, total_tokens=277, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [14]:
result = completion.choices[0].message.parsed

In [19]:
print(f'{result.planets[0].name} has a radius of {result.planets[0].radius} km, and mass of {result.planets[0].mass}e+24 kg')

Mercury has a radius of 2439.7 km, and mass of 0.3302e+24 kg
