In [None]:
# imports

import os
from dotenv import load_dotenv
from huggingface_hub import login
import numpy as np
from tqdm.notebook import tqdm
from openai import OpenAI
from litellm import completion
from sklearn.feature_extraction.text import HashingVectorizer
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from pricer.items import Item
from pricer.evaluator import evaluate

In [None]:
LITE_MODE = False

load_dotenv(override=True)
hf_token = os.environ['HF_TOKEN']
model_name = os.environ['OPENAI_MYMODEL_PRICER']
login(hf_token, add_to_git_credential=True)
openai = OpenAI()

In [None]:
username = "ed-donner"
dataset = f"{username}/items_lite" if LITE_MODE else f"{username}/items_full"

train, val, test = Item.from_hub(dataset)

print(f"Loaded {len(train):,} training items, {len(val):,} validation items, {len(test):,} test items")

# Training an ANN with my parameters

In [None]:
# Prepare our documents and prices

y = np.array([float(item.price) for item in train])
documents = [item.summary for item in train]

In [None]:
# Use the HashingVectorizer for a Bag of Words model
# Using binary=True with the CountVectorizer makes "one-hot vectors"
# Increased n_features to 5000

np.random.seed(42)
vectorizer = HashingVectorizer(n_features=5000, stop_words='english', binary=True)
X = vectorizer.fit_transform(documents)

In [None]:
# Define the neural network - here is Pytorch code to create a 8 layer neural network

class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Linear(input_size, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 64)
        self.layer4 = nn.Linear(64, 64)
        self.layer5 = nn.Linear(64, 64)
        self.layer6 = nn.Linear(64, 64)
        self.layer7 = nn.Linear(64, 64)
        self.layer8 = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        output1 = self.relu(self.layer1(x))
        output2 = self.relu(self.layer2(output1))
        output3 = self.relu(self.layer3(output2))
        output4 = self.relu(self.layer4(output3))
        output5 = self.relu(self.layer5(output4))
        output6 = self.relu(self.layer6(output5))
        output7 = self.relu(self.layer7(output6))
        output8 = self.layer8(output7)
        return output8

In [None]:
# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X.toarray())
y_train_tensor = torch.FloatTensor(y).unsqueeze(1)

# Split the data into training and validation sets
# Changed test_size to 20%
X_train, X_val, y_train, y_val = train_test_split(X_train_tensor, y_train_tensor, test_size=0.2, random_state=42)

# Create the loader
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Initialize the model
input_size = X_train_tensor.shape[1]
model = NeuralNetwork(input_size)

In [None]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Number of trainable parameters: {trainable_params:,}")

In [None]:
# Define loss function and optimizer
# Changed loss function from MSELoss to L1Loss
loss_function = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# We will do 3 complete runs through the data
EPOCHS = 3

for epoch in range(EPOCHS):
    model.train()
    for batch_X, batch_y in tqdm(train_loader):
        optimizer.zero_grad()

        # The next 4 lines are the 4 stages of training: forward pass, loss calculation, backward pass, optimize
        outputs = model(batch_X)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = loss_function(val_outputs, y_val)

    print(f'Epoch [{epoch+1}/{EPOCHS}], Train Loss: {loss.item():.3f}, Val Loss: {val_loss.item():.3f}')

In [None]:
def neural_network(item):
    model.eval()
    with torch.no_grad():
        vector = vectorizer.transform([item.summary])
        vector = torch.FloatTensor(vector.toarray())
        result = model(vector)[0].item()
    return max(0, result)

In [None]:
evaluate(neural_network, test)

# My test results:
# NN Error: $53.91 +/- $9.95; MSE: 8,064, r^2: 63.3%
# Slightly better than Grok-4.1 fast reasoning performance

# Testing the frontier model with my prompts

In [None]:
def messages_for(item):
    message = f"Estimate the price of this product. Use prices on Amazon US as benchmarks adjusted for inflation for the price in 2023. Respond with the price, no explanation\n\n{item.summary}"
    return [{"role": "user", "content": message}]

In [None]:
# The function for gpt-4.1-nano

def gpt_4__1_nano(item):
    response = completion(model="openai/gpt-4.1-nano", messages=messages_for(item))
    return response.choices[0].message.content

In [None]:
print(gpt_4__1_nano(test[0]))
print(f"${"{:.0f}".format(test[0].price)}")

In [None]:
evaluate(gpt_4__1_nano, test)

# Testing my fine-tuned frontier model with my prompts

In [None]:
# The prompt

def test_messages_for(item):
    message = f"Estimate the price of this product. Use prices on Amazon US as benchmarks adjusted for inflation for the price in 2023. Respond with the price, no explanation\n\n{item.summary}"
    return [
        {"role": "user", "content": message},
    ]

In [None]:
# The inference function

def gpt_4__1_nano_fine_tuned(item):
    response = openai.chat.completions.create(
        model=model_name,
        messages=test_messages_for(item),
        max_tokens=7
    )
    return response.choices[0].message.content

In [None]:
print(gpt_4__1_nano_fine_tuned(test[0]))
print(f"${"{:.2f}".format(test[0].price)}")

In [None]:
evaluate(gpt_4__1_nano_fine_tuned, test)