# End of week 1 exercise — SQL from Natural Language

Build a tool that takes a natural language question about data and returns a SQL query.  
Uses **OpenAI API** (gpt-4o-mini) and **Ollama** (llama3.2) so you can compare outputs.

In [None]:
# imports
import os
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from openai import OpenAI
import ollama

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

client = OpenAI()


In [None]:
# constants

MODEL_GPT = 'gpt-4o-mini'
MODEL_LLAMA = 'llama3.2'

In [None]:
# Natural language question — type over this to ask for different SQL
# Optional: provide schema so the model generates accurate queries

SCHEMA = """
-- users: id (int), name (text), email (text), created_at (timestamp)
-- orders: id (int), user_id (int), total (decimal), created_at (timestamp)
-- products: id (int), name (text), price (decimal)
"""

question = "List the top 5 users by total order amount, with their total spent."

In [None]:
# Get gpt-4o-mini to generate SQL, with streaming
system_prompt = """You are a SQL expert. Given a database schema and a natural language question, output only a valid SQL query. No explanation, no markdown fences—just the SQL."""

stream = client.chat.completions.create(
    model=MODEL_GPT,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": f"Schema:\n{SCHEMA}\n\nQuestion: {question}"},
    ],
    stream=True,
)

print("GPT-4o-mini SQL:\n")
for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="")
print()

In [None]:
# Get Llama 3.2 to generate SQL
response = ollama.chat(
    model=MODEL_LLAMA,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": f"Schema:\n{SCHEMA}\n\nQuestion: {question}"},
    ],
)

print("Llama 3.2 SQL:\n")
print(response["message"]["content"])