In [1]:
import os
import pickle
import json
import time
import random
from openai import OpenAI
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from items import Item 
from testing import Tester  

In [2]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [3]:
BASE_MODEL = "gpt-4o-mini-2024-07-18"
EPOCHS = 5
BATCH_SIZE = 8
LR_MULT = 0.3

In [None]:
with open("train.pkl", "rb") as f:
    train_subset = pickle.load(f)

with open("test.pkl", "rb") as f:
    val_subset = pickle.load(f) 

print(f"Loaded {len(train_subset)} training and {len(val_subset)} validation items.")

In [None]:
random.shuffle(train_subset)
random.shuffle(val_subset)

TRAIN_LIMIT = 5000 
VAL_LIMIT = 1000    

train_subset = train_subset[:TRAIN_LIMIT]
val_subset = val_subset[:VAL_LIMIT]

print(f"Using {len(train_subset)} training and {len(val_subset)} validation samples.")

In [6]:
def build_prompt(item):
    return f"""
### CONTEXT
You are a price estimation assistant for e-commerce listings.
Each product is described by its title, category, key features, and details.

### TASK
Estimate the most likely retail price in USD.
Think step-by-step about product type, quality, and included components 
before stating the final answer as "Predicted Price: $<amount>".

### EXAMPLES
- Wireless earbuds with active noise cancellation -> Predicted Price: $89
- Stainless steel kitchen knife set (6-piece) -> Predicted Price: $45
- Laptop stand aluminum adjustable -> Predicted Price: $32

### PRODUCT TITLE
{item.title}

### CATEGORY
{item.category}

### DETAILS
{item.details}

### YOUR REASONING
(Think about product quality, features, and typical market range.)

### FINAL ANSWER
Predicted Price: $
"""

def build_completion(item):
    return f"Predicted Price: ${round(item.price)}.00"

In [None]:
def write_jsonl(data, filename):
    with open(filename, "w", encoding="utf-8") as f:
        for item in data:
            if getattr(item, "include", True):
                prompt = build_prompt(item)
                completion = build_completion(item)
                json_obj = {
                    "messages": [
                        {"role": "user", "content": prompt},
                        {"role": "assistant", "content": completion}
                    ]
                }
                f.write(json.dumps(json_obj) + "\n")
    print(f"Wrote {len(data)} samples to {filename}")

TRAIN_JSONL = "train_prepared.jsonl"
VAL_JSONL = "val_prepared.jsonl"

write_jsonl(train_subset, TRAIN_JSONL)
write_jsonl(val_subset, VAL_JSONL)

In [8]:
train_file = client.files.create(file=open(TRAIN_JSONL, "rb"), purpose="fine-tune")
val_file = client.files.create(file=open(VAL_JSONL, "rb"), purpose="fine-tune")

In [None]:
job = client.fine_tuning.jobs.create(
    training_file=train_file.id,
    validation_file=val_file.id,
    model=BASE_MODEL,
    hyperparameters={
        "n_epochs": EPOCHS,
        "batch_size": BATCH_SIZE,
        "learning_rate_multiplier": LR_MULT
    }
)

print("Job started:", job.id)

In [None]:
def stream_finetune_events(job_id, poll_interval=30):
    print(f"Tracking fine-tuning job: {job_id}\n")
    seen = set()
    loss_data = []
    
    while True:
        job = client.fine_tuning.jobs.retrieve(job_id)
        events = client.fine_tuning.jobs.list_events(job_id)
        
        for e in events.data[::-1]:
            if e.id not in seen:
                seen.add(e.id)
                ts = datetime.fromtimestamp(e.created_at)
                msg = e.message
                print(f"[{ts:%Y-%m-%d %H:%M:%S}] {msg}")
                
                if "training_loss" in msg:
                    try:
                        step = int(msg.split("Step ")[1].split("/")[0])
                        train_loss = float(msg.split("training_loss: ")[1].split(",")[0])
                        val_loss = None
                        if "val_loss" in msg:
                            val_loss = float(msg.split("val_loss: ")[1].split(",")[0])
                        loss_data.append((step, train_loss, val_loss))
                    except Exception:
                        pass
        
        if job.status == "succeeded":
            print("\nFine-tuning complete!")
            print("Fine-tuned model ID:", job.fine_tuned_model)
            
            if loss_data:
                steps = [d[0] for d in loss_data]
                train_losses = [d[1] for d in loss_data]
                val_losses = [d[2] for d in loss_data if d[2] is not None]

                plt.figure(figsize=(8, 5))
                plt.plot(steps, train_losses, marker="o", color="teal", label="Training Loss")
                if val_losses:
                    plt.plot(steps[:len(val_losses)], val_losses, marker="o", color="orange", label="Validation Loss")
                plt.xlabel("Step")
                plt.ylabel("Loss")
                plt.title(f"Fine-Tuning Progress — {job_id}")
                plt.legend()
                plt.grid(alpha=0.3)
                plt.show()
            else:
                print("No loss data found. Fine-tuning may have completed too quickly to log metrics.")

            return job.fine_tuned_model

        elif job.status in ["failed", "cancelled"]:
            print(f"\nFine-tuning {job.status}.")
            if job.error:
                print("Error:", job.error)
            return None

        time.sleep(poll_interval)

MODEL_ID = stream_finetune_events(job.id)

In [None]:
def test_model(model_id, test_items, max_samples=100):
    y_true, y_pred = [], []
    for i, item in enumerate(test_items[:max_samples]):
        prompt = build_prompt(item)
        response = client.chat.completions.create(
            model=model_id,
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        output = response.choices[0].message.content
        try:
            pred_price = float(output.split("$")[1].split()[0])
        except:
            continue
        y_true.append(item.price)
        y_pred.append(pred_price)
        print(f"{i+1}. {item.title[:50]} | Actual: ${item.price} | Pred: ${pred_price}")
    return y_true, y_pred

y_true, y_pred = test_model(MODEL_ID, val_subset)

In [None]:
errors = np.abs(np.array(y_true) - np.array(y_pred))
colors = ["green" if e < 10 else "orange" if e < 25 else "red" for e in errors]

plt.figure(figsize=(10,6))
plt.scatter(range(len(y_true)), y_true, color='blue', label='Actual', alpha=0.6)
plt.scatter(range(len(y_pred)), y_pred, color=colors, label='Predicted', alpha=0.8)
plt.title("Fine-tuned Price Prediction Performance (Color-Coded by Error)")
plt.xlabel("Sample Index")
plt.ylabel("Price ($)")
plt.legend()
plt.show()

avg_error = np.mean(errors)
print(f"\nAverage error: ${avg_error:.2f}")