In [None]:
import os
import re
import math
import json
import random
from dotenv import load_dotenv
from huggingface_hub import login
import matplotlib.pyplot as plt
import numpy as np
import pickle
from collections import Counter
from openai import OpenAI
from anthropic import Anthropic

# environment

load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')

hf_token = os.environ['HF_TOKEN']
login(hf_token, add_to_git_credential=True)


from items import Item
from testing import Tester

openai = OpenAI()

%matplotlib inline

In [None]:

with open('train.pkl', 'rb') as file:
    train = pickle.load(file)

with open('test.pkl', 'rb') as file:
    test = pickle.load(file)


fine_tune_train = train[:200]
fine_tune_validation = train[200:250]


def messages_for(item):
    system_message = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt = item.test_prompt().replace(" to the nearest dollar","").replace("\n\nPrice is $","")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": f"Price is ${item.price:.2f}"}
    ]

def make_jsonl(items):
    result = ""
    for item in items:
        messages = messages_for(item)
        messages_str = json.dumps(messages)
        result += '{"messages": ' + messages_str +'}\n'
    return result.strip()


def write_jsonl(items, filename):
    with open(filename, "w") as f:
        jsonl = make_jsonl(items)
        f.write(jsonl)



# Trained too fast
It resulted in overfitting (validation loss jumping all around about x4 larger) although Accuracy stayed constant.  
Epochs: 2 Batch size: 16 LR multiplier:0.1

Lots of error, that afterthough may result from the parsing output (didn't check)  
**Metrics**: $153, RMSLE 3.6 Hits 31%  

In [None]:
write_jsonl(fine_tune_train, "fine_tune_train.jsonl")
write_jsonl(fine_tune_validation, "fine_tune_validation.jsonl")

with open("fine_tune_train.jsonl", "rb") as f:
    train_file = openai.files.create(file=f, purpose="fine-tune")
with open("fine_tune_validation.jsonl", "rb") as f:
    validation_file = openai.files.create(file=f, purpose="fine-tune")

wandb_integration = {"type": "wandb", "wandb": {"project": "gpt-pricer"}}

openai.fine_tuning.jobs.create(
    training_file=train_file.id,
    validation_file=validation_file.id,
    model="gpt-4o-mini-2024-07-18",
    seed=42,
    hyperparameters={"n_epochs": 5},
    integrations = [wandb_integration],
    suffix="pricer_v1"
)

fine_tuned_model_name_hpo = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model
# The prompt

def messages_for_test(item):
    system_message = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt = item.test_prompt().replace(" to the nearest dollar","").replace("\n\nPrice is $","")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": "Price is $"}
    ]
# A utility function to extract the price from a string

def get_price(s):
    s = s.replace('$','').replace(',','')
    match = re.search(r"[-+]?\d*\.\d+|\d+", s)
    return float(match.group()) if match else 0

# The function for gpt-4o-mini

def gpt_fine_tuned(item):
    response = openai.chat.completions.create(
        model=fine_tuned_model_name_hpo,
        messages=messages_for_test(item),
        seed=42,
        max_tokens=7
    )
    reply = response.choices[0].message.content
    return get_price(reply)

Tester.test(gpt_fine_tuned, test)

# Same OP model, but with nicer prompting ONLY at inference
It fixed the $0 prices, driving 
**Metrics**: $88, RMSLE 0.59 Hits 50%  

In [None]:
def modified_messages_for_test(item):
    system_message = (
        "You are a helpful assistant skilled at estimating the prices of a wide range of products and purchases."
        "Analyze the detailed information provided about a product—including its description, brand, features, and any relevant specs or packaging."
        "Respond with your best conservative estimate of the typical sale price in U.S. dollars for very similar products at an online marketplace"
        "Reply ONLY with the price number WITHOUT any explanation, reasoning, or extra text."
        "Price cannot be zero, always make sensible assumptions."
    )
    user_prompt = (
        "What could be a conservative estimate for the price of the following product:\n\n" +
        item.test_prompt().replace(" to the nearest dollar", "").replace("\n\nPrice is $", "")
    )
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": f"Price is $"}
    ]


def gpt_fine_tuned(item):
    response = openai.chat.completions.create(
        model=fine_tuned_model_name_epoch5,
        messages=modified_messages_for_test(item),
        seed=42,
        max_tokens=7
    )
    reply = response.choices[0].message.content
    return get_price(reply)

Tester.test(gpt_fine_tuned, test)

# Trying to fix overfitting, setting new HPO and prompting on training  
Epochs:1 Batch size:1 LR multiplier:0.01  
Didn't make noticeable difference   
**Metrics**: $89, RMSLE 0.56 Hits 50%  



In [None]:

def modified_messages_for(item):
    system_message = (
        "You are a helpful assistant skilled at estimating the prices of a wide range of products and purchases."
        "Analyze the detailed information provided about a product—including its description, brand, features, and any relevant specs or packaging."
        "Respond with your best conservative estimate of the typical sale price in U.S. dollars for very similar products at an online marketplace"
        "Reply ONLY with the price number WITHOUT any explanation, reasoning, or extra text."
        "Price cannot be zero, always make sensible assumptions."
    )
    user_prompt = (
        "What could be a conservative estimate for the price of the following product:\n\n" +
        item.test_prompt().replace(" to the nearest dollar", "").replace("\n\nPrice is $", "")
    )
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": f"Price is ${item.price:.2f}"}

    ]

def modified_make_jsonl(items):
    result = ""
    for item in items:
        messages = modified_messages_for(item)
        messages_str = json.dumps(messages)
        result += '{"messages": ' + messages_str +'}\n'
    return result.strip()

def modified_write_jsonl(items, filename):
    with open(filename, "w") as f:
        jsonl = modified_make_jsonl(items)
        f.write(jsonl)

modified_write_jsonl(fine_tune_train, "mod_fine_tune_train.jsonl")
modified_write_jsonl(fine_tune_validation, "mod_fine_tune_validation.jsonl")


with open("mod_fine_tune_train.jsonl", "rb") as f:
    mod_train_file = openai.files.create(file=f, purpose="fine-tune")
with open("mod_fine_tune_validation.jsonl", "rb") as f:
    mod_validation_file = openai.files.create(file=f, purpose="fine-tune")

openai.fine_tuning.jobs.create(
    training_file=mod_train_file.id,
    validation_file=mod_validation_file.id,
    model="gpt-4o-mini-2024-07-18",
    seed=42,
    hyperparameters={"n_epochs": 1, "learning_rate_multiplier":1., "batch_size":1},
    integrations = [wandb_integration],
    suffix="pricer_v3"
)

In [None]:
fine_tuned_model_name_prompt_train = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model


def mod_gpt_fine_tuned(item):
    response = openai.chat.completions.create(
        model=fine_tuned_model_name_prompt_train,
        messages=modified_messages_for_test(item),
        seed=42,
        max_tokens=7
    )
    reply = response.choices[0].message.content
    return get_price(reply)

Tester.test(mod_gpt_fine_tuned, test)

# Last model to fix achieve faster convergence
Epochs:1 Batch size:1 LR multiplier:1  
**Metrics**: $87, RMSLE 0.59 Hits 47%  


In [None]:
openai.fine_tuning.jobs.create(
    training_file=mod_train_file.id,
    validation_file=mod_validation_file.id,
    model="gpt-4o-mini-2024-07-18",
    seed=42,
    hyperparameters={"n_epochs": 1, "learning_rate_multiplier":1., "batch_size":1},
    integrations = [wandb_integration],
    suffix="pricer_v3"
)

In [None]:
fine_tuned_model_name_prompt_train_lr = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model

def mod_gpt_fine_tuned_v2(item):
    response = openai.chat.completions.create(
        model=fine_tuned_model_name_prompt_train_lr,
        messages=modified_messages_for_test(item),
        seed=42,
        max_tokens=7
    )
    reply = response.choices[0].message.content
    return get_price(reply)

Tester.test(mod_gpt_fine_tuned_v2, test)

## Summary
For this model in particular, it seems way more important the prompting than the finetuning itself.
We've tried to train more, turning to overfitting. Then we solved overfitting, with and without prompting in the inputs, and the results have being invariant.