In [None]:
%pwd
%cd ..
%pwd

In [None]:
import os
import math
import json
import random
from dotenv import load_dotenv
from huggingface_hub import login
import matplotlib.pyplot as plt
import numpy as np
import pickle
from collections import Counter
from items import Item
from collections import Counter
from openai import OpenAI

In [None]:
from testing import Tester

In [None]:
load_dotenv()
os.environ["OPENAI_API"] = os.getenv('OPENAI_API')

In [None]:
openai = OpenAI(api_key=os.environ["OPENAI_API"])

In [None]:
%matplotlib inline

In [None]:
with open('train.pkl','rb') as file:
    train = pickle.load(file)
with open('test.pkl','rb') as file:
    test = pickle.load(file)

In [None]:
fine_tune_train = train[:500]
fine_tune_validation  = train[500:550]

In [None]:
def messages_for(item):
    system_prompt = "You estimate prices of items. Reply only with the price, no explination"
    user_prompt = item.test_prompt().replace(" to the nearest dollar","").replace('\n\nPricr is $',"")
    return [
        {"role":"system","content":system_prompt},
        {"role":"user","content":user_prompt},
        {"role":"assistant","content":f"Price is ${item.price:.2f}"}
    ]

In [None]:
# messages_for(train[0])

In [None]:
def make_jsonl(items):
    result = ""
    for item in items:
        messages = messages_for(item)
        messages_str = json.dumps(messages)
        result += '{"messages": '+messages_str +'}\n'
    return result.strip()

In [None]:
# print(make_jsonl(train[:4]))

In [None]:
def write_jsonl(items,filename):
    with open(filename,"w") as f:
        jsonl = make_jsonl(items)
        # print(jsonl)
        f.write(jsonl)

In [None]:
write_jsonl(fine_tune_train,"fine_tune_train.jsonl")

In [None]:
write_jsonl(fine_tune_validation,"fine_tune_validation.jsonl")

In [None]:
with open("fine_tune_train.jsonl",'rb') as f:
    trian_file = openai.files.create(file=f,purpose='fine-tune')

In [None]:
trian_file

In [None]:
with open("fine_tune_validation.jsonl",'rb') as f:
    validation_file = openai.files.create(file=f,purpose='fine-tune')

In [None]:
validation_file

In [None]:
wandb_integration = {"type":"wandb","wandb":{"project":"gpt-pricer"}}

In [None]:
openai.fine_tuning.jobs.create(
    training_file=trian_file.id,
    validation_file=validation_file.id,
    model = "gpt-4o-mini-2024-07-18",
    seed=42,
    hyperparameters={"n_epochs":1},
    integrations=[wandb_integration],
    suffix="pricer"
)

In [None]:
openai.fine_tuning.jobs.list(limit=1)

In [None]:
job_id = openai.fine_tuning.jobs.list(limit=1).data[0].id
job_id

In [None]:
openai.fine_tuning.jobs.retrieve(job_id)

In [None]:
openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id,limit=10).data

In [None]:
fine_tuned_model = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model

In [None]:
def messages_for(item):
    system_prompt = "You estimate prices of items. Reply only with the price, no explination needed"
    user_prompt = item.test_prompt().replace(" to the nearest dollar","").replace("\n\nPrice is $","")
    return [
        {"role":"system","content":system_prompt},
        {"role":"user","content":user_prompt},
        {"role":"assistant","content":"Price is $"}
    ]

In [None]:
messages_for(test[0])

In [None]:
import re
def get_price(s):
    s = s.replace('$','').replace(',','')
    match = re.search(r"[-+]?\d*\.\d+|\d+", s)
    return float(match.group()) if match else 0

In [None]:
get_price("The price is 99$")

In [None]:
def gpt_fine_tuned(item):
    response = openai.chat.completions.create(
        model=fine_tuned_model,
        messages=messages_for(item),
        seed=42,
        max_tokens = 7
    )
    reply = response.choices[0].message.content
    return get_price(reply)

In [None]:
print(test[0].price)
print(gpt_fine_tuned(test[0]))

In [None]:
Tester.test(gpt_fine_tuned,test)