In [1]:
# imports

import os
import re
import json
from dotenv import load_dotenv
from huggingface_hub import login
from openai import OpenAI
from pricer.items  import Item
from pricer.evaluator import evaluate

In [2]:

load_dotenv(override=True)
hf_token = os.environ['HF_TOKEN']
login(hf_token, add_to_git_credential=True)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
dataset = "ed-donner/items_lite"
train, val, test = Item.from_hub(dataset)

print(f"Loaded {len(train):,} training items, {len(val):,} validation items, {len(test):,} test items")

Generating train split:   0%|          | 0/20000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Loaded 20,000 training items, 1,000 validation items, 1,000 test items


In [4]:
openai = OpenAI()

# Data size

OpenAI recommends fine-tuning with a small population of 50-100 examples

so, i m goona go with 100 examples


In [5]:

fine_tune_train = train[:100]
fine_tune_validation = val[:50]

In [6]:
len(fine_tune_train)

100

# Step 1

Prepare our data for fine-tuning in JSONL (JSON Lines) format and upload to OpenAI

In [7]:
def messages_for(item):
    message = f"Estimate the price of this product. Respond with the price, no explanation\n\n{item.summary}"
    return [
        {"role": "user", "content": message},
        {"role": "assistant", "content": f"${item.price:.2f}"}
    ]

In [8]:
messages_for(fine_tune_train[0])

[{'role': 'user',
  'content': 'Estimate the price of this product. Respond with the price, no explanation\n\nTitle: Schlage F59 & 613 Andover Interior Knob (Deadbolt Included)  \nCategory: Home Hardware  \nBrand: Schlage  \nDescription: A single‑piece oil‑rubbed bronze knob that mounts to a deadbolt for secure, easy interior door use.  \nDetails: Designed for a 4" minimum center‑to‑center door prep, it offers a lifetime mechanical and finish warranty and comes ready for quick installation.'},
 {'role': 'assistant', 'content': '$64.30'}]

In [9]:
# Convert the items into a list of json objects - a "jsonl" string
# Each row represents a message in the form:
# {"messages" : [{"role": "system", "content": "You estimate prices...


def make_jsonl(items):
    result = ""
    for item in items:
        messages = messages_for(item)
        messages_str = json.dumps(messages)
        result += '{"messages": ' + messages_str +'}\n'
    return result.strip()

In [11]:
make_jsonl(fine_tune_train[:2])

'{"messages": [{"role": "user", "content": "Estimate the price of this product. Respond with the price, no explanation\\n\\nTitle: Schlage F59 & 613 Andover Interior Knob (Deadbolt Included)  \\nCategory: Home Hardware  \\nBrand: Schlage  \\nDescription: A single\\u2011piece oil\\u2011rubbed bronze knob that mounts to a deadbolt for secure, easy interior door use.  \\nDetails: Designed for a 4\\" minimum center\\u2011to\\u2011center door prep, it offers a lifetime mechanical and finish warranty and comes ready for quick installation."}, {"role": "assistant", "content": "$64.30"}]}\n{"messages": [{"role": "user", "content": "Estimate the price of this product. Respond with the price, no explanation\\n\\nTitle: Mini Electric Air Duster Fan  \\nCategory: Electronics  \\nBrand: Kica  \\nDescription: Ultra\\u2011compact 86,000\\u202fRPM electric air duster with 11\\u202fm/s wind speed for precise cleaning and inflation.  \\nDetails: Powered by a 9.99\\u202fWh motor, adjustable in four speed

In [12]:
# Convert the items into jsonl and write them to a file

def write_jsonl(items, filename):
    with open(filename, "w") as f:
        jsonl = make_jsonl(items)
        f.write(jsonl)

In [13]:
write_jsonl(fine_tune_train, "jsonl/fine_tune_train.jsonl")


In [20]:
write_jsonl(fine_tune_validation, "jsonl/fine_tune_validation.jsonl")


In [21]:
with open("jsonl/fine_tune_train.jsonl", "rb") as f:
    train_file = openai.files.create(file=f, purpose="fine-tune")

In [22]:
train_file

FileObject(id='file-2YUvYiSPBe6gc56uZzWrdH', bytes=55120, created_at=1771412640, filename='fine_tune_train.jsonl', object='file', purpose='fine-tune', status='processed', expires_at=None, status_details=None)

In [23]:
with open("jsonl/fine_tune_validation.jsonl", "rb") as f:
    validation_file = openai.files.create(file=f, purpose="fine-tune")

In [24]:
validation_file

FileObject(id='file-UGAwTmsDLaViv7a9osRYmB', bytes=27637, created_at=1771412644, filename='fine_tune_validation.jsonl', object='file', purpose='fine-tune', status='processed', expires_at=None, status_details=None)

# Step 2

## And now time to Fine-tune!

In [25]:
openai.fine_tuning.jobs.create(
    training_file=train_file.id,
    validation_file=validation_file.id,
    model="gpt-4.1-nano-2025-04-14",
    seed=42,
    hyperparameters={"n_epochs": 1, "batch_size": 1},
    suffix="pricer"
)

FineTuningJob(id='ftjob-2xQpgeZot5lHiMZpvLatKArv', created_at=1771412814, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size=1, learning_rate_multiplier='auto', n_epochs=1), model='gpt-4.1-nano-2025-04-14', object='fine_tuning.job', organization_id='org-kmhgHPXGNsqZIbxgdJIX8TyA', result_files=[], seed=42, status='validating_files', trained_tokens=None, training_file='file-2YUvYiSPBe6gc56uZzWrdH', validation_file='file-UGAwTmsDLaViv7a9osRYmB', estimated_finish=None, integrations=[], metadata=None, method=Method(type='supervised', dpo=None, reinforcement=None, supervised=SupervisedMethod(hyperparameters=SupervisedHyperparameters(batch_size=1, learning_rate_multiplier='auto', n_epochs=1))), user_provided_suffix='pricer', usage_metrics=None, shared_with_openai=False, eval_id=None, internal_worker_backend=None)

In [26]:
openai.fine_tuning.jobs.list(limit=1)

SyncCursorPage[FineTuningJob](data=[FineTuningJob(id='ftjob-2xQpgeZot5lHiMZpvLatKArv', created_at=1771412814, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size=1, learning_rate_multiplier='auto', n_epochs=1), model='gpt-4.1-nano-2025-04-14', object='fine_tuning.job', organization_id='org-kmhgHPXGNsqZIbxgdJIX8TyA', result_files=[], seed=42, status='validating_files', trained_tokens=None, training_file='file-2YUvYiSPBe6gc56uZzWrdH', validation_file='file-UGAwTmsDLaViv7a9osRYmB', estimated_finish=None, integrations=[], metadata=None, method=Method(type='supervised', dpo=None, reinforcement=None, supervised=SupervisedMethod(hyperparameters=SupervisedHyperparameters(batch_size=1, learning_rate_multiplier='auto', n_epochs=1))), user_provided_suffix='pricer', usage_metrics=None, shared_with_openai=False, eval_id=None, internal_worker_backend=None)], has_more=False, object='list')

In [27]:
job_id = openai.fine_tuning.jobs.list(limit=1).data[0].id

In [28]:
job_id

'ftjob-2xQpgeZot5lHiMZpvLatKArv'

In [None]:
openai.fine_tuning.jobs.retrieve(job_id)

In [29]:
openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10).data

[FineTuningJobEvent(id='ftevent-iAmPu4gCfUHSU2MBzx78V4C7', created_at=1771412893, level='info', message='Fine-tuning job started', object='fine_tuning.job.event', data=None, type='message'),
 FineTuningJobEvent(id='ftevent-tHwAejtSXiTpMb3ITBHC74Ux', created_at=1771412892, level='info', message='Files validated, moving job to queued state', object='fine_tuning.job.event', data={}, type='message'),
 FineTuningJobEvent(id='ftevent-M6j7RRg4tnsLFhhprGzsWWcn', created_at=1771412815, level='info', message='Validating training file: file-2YUvYiSPBe6gc56uZzWrdH and validation file: file-UGAwTmsDLaViv7a9osRYmB', object='fine_tuning.job.event', data={}, type='message'),
 FineTuningJobEvent(id='ftevent-bb5yYlYBoC2HFLaYOoJ0p7E5', created_at=1771412814, level='info', message='Created fine-tuning job: ftjob-2xQpgeZot5lHiMZpvLatKArv', object='fine_tuning.job.event', data={}, type='message')]

### Testing our fine tuned model


In [49]:
fine_tuned_model_name = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model

In [50]:
fine_tuned_model_name

'ft:gpt-4.1-nano-2025-04-14:personal:pricer:DAZzMzP2'

In [51]:

def test_messages_for(item):
    message = f"Estimate the price of this product. Respond with the price, no explanation\n\n{item.summary}"
    return [
        {"role": "user", "content": message},
    ]

In [52]:
test_messages_for(test[0])

[{'role': 'user',
  'content': 'Estimate the price of this product. Respond with the price, no explanation\n\nTitle: Excess V2 Distortion/Modulation Pedal  \nCategory: Music Pedals  \nBrand: Old Blood Noise  \nDescription: A versatile pedal offering distortion and three modulation modes—delay, chorus, and harmonized fifths—with full control over signal routing and expression.  \nDetails: Features include separate gain, tone, and volume controls; time, depth, and volume per modulation; order switching, soft‑touch bypass, and expression jack for dynamic control.'}]

In [53]:

def gpt_4__1_nano_fine_tuned(item):
    response = openai.chat.completions.create(
        model=fine_tuned_model_name,
        messages=test_messages_for(item),
        max_tokens=7
    )
    return response.choices[0].message.content

In [54]:
print(test[0].price)
print(gpt_4__1_nano_fine_tuned(test[0]))

219.0
$239.00


In [57]:
evaluate(gpt_4__1_nano_fine_tuned, test)

  0%|          | 0/200 [00:00<?, ?it/s]

[93m$76 [91m$83 [92m$0 [92m$30 [93m$69 [91m$100 [92m$23 [91m$91 [92m$1 [92m$50 [91m$529 [91m$155 [92m$8 [93m$58 [92m$14 [92m$5 [92m$15 [92m$6 [92m$32 [93m$54 [93m$55 [92m$6 [91m$345 [91m$85 [91m$212 [91m$294 [92m$45 [92m$5 [91m$431 [93m$62 [92m$40 [92m$10 [91m$281 [93m$40 [91m$149 [91m$366 [91m$147 [93m$58 [91m$98 [92m$1 [92m$35 [92m$35 [92m$18 [93m$69 [92m$40 [92m$35 [93m$72 [92m$3 [91m$101 [92m$27 [92m$7 [91m$113 [92m$31 [92m$20 [91m$712 [92m$31 [92m$18 [93m$67 [92m$36 [92m$6 [91m$132 [93m$40 [92m$4 [93m$52 [93m$239 [92m$10 [93m$65 [91m$290 [93m$75 [91m$453 [92m$8 [92m$32 [91m$140 [91m$91 [93m$45 [92m$8 [92m$16 [92m$6 [92m$13 [92m$0 [92m$29 [92m$8 [91m$262 [93m$49 [92m$10 [92m$15 [93m$63 [91m$128 [91m$84 [92m$16 [92m$1 [92m$22 [92m$8 [92m$13 [92m$12 [91m$103 [92m$9 [91m$950 [92m$25 [92m$27 [92m$23 [91m$91 [93m$53 [91m$100 [92m$17 [91m$161 [92m$13 [91m$362 [92m$5 [92m$10 [93m

In [59]:
import gradio as gr

In [71]:

SYSTEM_PROMPT = """
You are a product price estimation AI.
Respond ONLY with the estimated price number.
No explanation.
"""

In [72]:

def estimate_price(product_description, history):
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"Estimate the price of this product:\n{product_description}"}
    ]

    response = openai.chat.completions.create(
        model=fine_tuned_model_name,
        messages=messages,
        max_tokens=10,
        temperature=0
    )

    return response.choices[0].message.content



In [None]:


# 🔴 PUT YOUR REAL MODEL ID
MODEL = fine_tuned_model_name

def build_prompt(title, category, brand, description, details):

    prompt = f"""Estimate the price of this product. Respond with the price, no explanation

Title: {title}
Category: {category}
Brand: {brand}
Description: {description}
Details: {details}
"""
    return prompt



def estimate_price(title, category, brand, description, details):

    prompt = build_prompt(title, category, brand, description, details)

    response = openai.chat.completions.create(
        model=MODEL,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=10,
        temperature=0
    )

    return response.choices[0].message.content.strip()


with gr.Blocks(theme=gr.themes.Soft(), title="AI Price Estimator") as demo:

    gr.Markdown(
        """
        # 💰 AI Product Price Estimator  
        Fine-tuned GPT model trained on product pricing dataset.

        Fill product details → click **Estimate Price**
        """
    )

    with gr.Row():
        title = gr.Textbox(label="Title")
        brand = gr.Textbox(label="Brand")

    with gr.Row():
        category = gr.Textbox(label="Category")
        details = gr.Textbox(label="Details")

    description = gr.Textbox(
        label="Description",
        lines=3
    )

    estimate_btn = gr.Button("Estimate Price 💰")

    output = gr.Textbox(
        label="Estimated Price",
        lines=1
    )

    estimate_btn.click(
        estimate_price,
        inputs=[title, category, brand, description, details],
        outputs=output
    )

    gr.Examples(
        examples=[
            [
                "Mini Electric Air Duster Fan",
                "Electronics",
                "Kica",
                "Ultra-compact electric air duster for cleaning",
                "86,000 RPM, lithium battery"
            ],
            [
                "Wooden Dining Table",
                "Furniture",
                "IKEA",
                "6 seater wooden dining table",
                "Solid oak wood"
            ]
        ],
        inputs=[title, category, brand, description, details]
    )

demo.launch()



The parameters have been moved from the Blocks constructor to the launch() method in Gradio 6.0: theme. Please pass these parameters to launch() instead.



* Running on local URL:  http://127.0.0.1:7871
* To create a public link, set `share=True` in `launch()`.


