# Fine‑tuning **GPT‑4.1 nano** for <$0.50  

This notebook shows a minimal, low‑cost fine‑tuning workflow that trains **GPT‑4.1 nano** on 100 short jokes (~15 K tokens)

In [1]:
!pip install --quiet openai tiktoken pandas datasets

In [2]:
import os, json, random, tiktoken, pandas as pd
from dotenv import load_dotenv
load_dotenv()
from datasets import load_dataset
import openai

# Set OpenAI API key
openai.api_key = os.getenv('OPENAI_API_KEY')

In [None]:
from datasets import load_dataset
import random
# i want to use ysharma/short_jokes but don't work because moderation restrictions i get 
# The job failed due to an unsafe training file. This training 
# file was blocked by our moderation system because it contains
# too many examples that violate OpenAI's usage policies, or because 
# it attempts to create model outputs that violate OpenAI's usage policies.
# so i will use shuttie/dadjokes instead
ds = load_dataset('shuttie/dadjokes', split='train')
print("Total jokes loaded:", len(ds))
# Sample 50 jokes for demonstration
sampled = random.sample(list(zip(ds["question"], ds["response"])), 50)
print(sampled[:5])


Total jokes loaded: 52000
[("Why couldn't the toilet paper cross the road", 'It got stuck in a crack'), ("Dadjoked a cashier I was out with some friends, we were grabbing some food at a local coffee shop.  Their prices were fucking sweet, like three bucks for a sandwich.  Anyway, I placed by order:  -$4. 50 for a grilled cheese (heavenly) - -$3. 00 for a small shake - -$0. 60 tax - The cashier nods and says,  Thank you, that'll be $8", "10   I replied,  It's about to be *ea*-ten   I'm pretty sure they spit in my food"), ('The future, the present and the past walked into a bar,', 'Things got a little tense.'), ('My daughter got engaged to a Russian guy and her wedding is in a few weeks', 'I’m just really worried about the Soviet Union'), ('6:30 is the best time of day', 'Hands down')]


In [13]:
SYSTEM_PROMPT = "You are a witty assistant that answers with a short, family‑friendly dad joke."

train_examples = [
    {"messages":[
        {"role":"system","content":SYSTEM_PROMPT},
        {"role":"user","content":q.strip()},
        {"role":"assistant","content":a.strip()}
    ]} for q,a in sampled
]

print(train_examples[0])

{'messages': [{'role': 'system', 'content': 'You are a witty assistant that answers with a short, family‑friendly dad joke.'}, {'role': 'user', 'content': "Why couldn't the toilet paper cross the road"}, {'role': 'assistant', 'content': 'It got stuck in a crack'}]}


In [14]:
import tiktoken

enc = tiktoken.get_encoding("o200k_base")

token_total = 0
for ex in train_examples:
    for m in ex["messages"]:
        token_total += len(enc.encode(m["content"]))

print(f"Estimated tokens: {token_total}")

USD_PER_M_TOKENS_TRAIN = 1.50 
train_cost = (token_total / 1_000_000) * USD_PER_M_TOKENS_TRAIN
print(f"≈ ${train_cost:.4f} total training cost")


Estimated tokens: 2273
≈ $0.0034 total training cost


In [15]:
jsonl_path = "jokes_gpt41nano.jsonl"
with open(jsonl_path, "w", encoding="utf-8") as f:
    for ex in train_examples:
        f.write(json.dumps(ex) + "\n")

print("Wrote", jsonl_path)

Wrote jokes_gpt41nano.jsonl


In [16]:
# Upload file
file_obj = openai.files.create(
    file=open(jsonl_path, "rb"),
    purpose="fine-tune"
)
print("Uploaded file:", file_obj.id)

# Start fine‑tuning job (single epoch keeps cost tiny; you can raise if desired)
job = openai.fine_tuning.jobs.create(
    training_file=file_obj.id,
    model="gpt-4.1-nano-2025-04-14",
    hyperparameters={
        "n_epochs": 1
    }
)
print("Job started:", job.id)

Uploaded file: file-1ZU1P8Qm1NA7JqWoVqLpD4
Job started: ftjob-WLpCJIma9qg3jEn8vc5Q1Rhn


In [17]:
import time, sys
while True:
    job_status = openai.fine_tuning.jobs.retrieve(job.id)
    print("Status:", job_status.status)
    if job_status.status in ("succeeded", "failed", "cancelled"):
        break
    time.sleep(10)

Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files
Status: validating_files


In [18]:
MODEL_ID = job_status.fine_tuned_model
print("Fine‑tuned model id:", MODEL_ID)

resp = openai.chat.completions.create(
    model=MODEL_ID,
    messages=[
        {"role":"system", "content": SYSTEM_PROMPT},
        {"role":"user", "content": "Tell me a short joke about computers."}
    ]
)
print(resp.choices[0].message.content)

Fine‑tuned model id: ft:gpt-4.1-nano-2025-04-14:personal::By6Rub20
My computer kept freezing, so I ran the defrost program
