In [1]:
# imports

import os
import re
import math
import json
import random
from dotenv import load_dotenv
from huggingface_hub import login
from items import Item
import matplotlib.pyplot as plt
import numpy as np
import pickle
from collections import Counter
from openai import OpenAI
from anthropic import Anthropic
# tester function
from testing import Tester

In [2]:
# environment

load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')

In [3]:
# Log in to HuggingFace

hf_token = os.environ['HF_TOKEN']
login(hf_token, add_to_git_credential=True)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [4]:
openai = OpenAI()
claude = Anthropic()

In [5]:
%matplotlib inline

In [6]:
# Let's avoid curating all our data again! Load in the pickle files:

with open('train.pkl', 'rb') as file:
    train = pickle.load(file)

train = [item for item in train if item.prompt]

with open('test.pkl', 'rb') as file:
    test = pickle.load(file)

test = [item for item in test if item.prompt]

In [7]:
# OpenAI recommends fine-tuning with populations of 50-100 examples
# But as our examples are very small, I'm suggesting we go with 200 examples (and 1 epoch)

fine_tune_train = train[:200]
fine_tune_validation = train[200:250]

In [8]:
# Create a good prompt for the frontier model

def messages_for(item):
    system_message = "You estimate the sentiment of financial news. Reply only with either 0 for 'bearish', 1 for 'bullish' or 2 for 'neutral'. Don't provider any explanation"
    user_prompt = item.test_prompt().replace("\n\nSentiment is","")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": "Sentiment is"}
    ]

In [9]:
messages_for(train[0])

[{'role': 'system',
  'content': "You estimate the sentiment of financial news. Reply only with either 0 for 'bearish', 1 for 'bullish' or 2 for 'neutral'. Don't provider any explanation"},
 {'role': 'user',
  'content': 'What is the financial sentiment of this tweet?\n\n$BYND - JPMorgan reels in expectations on Beyond Meat https '},
 {'role': 'assistant', 'content': 'Sentiment is'}]

In [10]:
# Convert the items into a list of json objects - a "jsonl" string
# Each row represents a message in the form:
# {"messages" : [{"role": "system", "content": "You estimate prices...


def make_jsonl(items):
    result = ""
    for item in items:
        messages = messages_for(item)
        messages_str = json.dumps(messages)
        result += '{"messages": ' + messages_str +'}\n'
    return result.strip()

In [11]:
print(make_jsonl(train[:3]))

{"messages": [{"role": "system", "content": "You estimate the sentiment of financial news. Reply only with either 0 for 'bearish', 1 for 'bullish' or 2 for 'neutral'. Don't provider any explanation"}, {"role": "user", "content": "What is the financial sentiment of this tweet?\n\n$BYND - JPMorgan reels in expectations on Beyond Meat https "}, {"role": "assistant", "content": "Sentiment is"}]}
{"messages": [{"role": "system", "content": "You estimate the sentiment of financial news. Reply only with either 0 for 'bearish', 1 for 'bullish' or 2 for 'neutral'. Don't provider any explanation"}, {"role": "user", "content": "What is the financial sentiment of this tweet?\n\n$CCL $RCL - Nomura points to bookings weakness at Carnival and Royal Caribbean https "}, {"role": "assistant", "content": "Sentiment is"}]}
{"messages": [{"role": "system", "content": "You estimate the sentiment of financial news. Reply only with either 0 for 'bearish', 1 for 'bullish' or 2 for 'neutral'. Don't provider any

In [12]:
# Convert the items into jsonl and write them to a file

def write_jsonl(items, filename):
    with open(filename, "w") as f:
        jsonl = make_jsonl(items)
        f.write(jsonl)

In [13]:
write_jsonl(fine_tune_train, "fine_tune_train.jsonl")

In [14]:
write_jsonl(fine_tune_validation, "fine_tune_validation.jsonl")

In [15]:
with open("fine_tune_train.jsonl", "rb") as f:
    train_file = openai.files.create(file=f, purpose="fine-tune")

In [16]:
train_file

FileObject(id='file-5RkgbPcDkwSPBXZYVybbGa', bytes=81232, created_at=1756338703, filename='fine_tune_train.jsonl', object='file', purpose='fine-tune', status='processed', expires_at=None, status_details=None)

In [17]:
with open("fine_tune_validation.jsonl", "rb") as f:
    validation_file = openai.files.create(file=f, purpose="fine-tune")

In [18]:
validation_file

FileObject(id='file-FLsZM4AWrEatybLLzQS3K2', bytes=20670, created_at=1756338703, filename='fine_tune_validation.jsonl', object='file', purpose='fine-tune', status='processed', expires_at=None, status_details=None)

In [19]:
wandb_integration = {"type": "wandb", "wandb": {"project": "gpt-labeler"}}

In [20]:
train_file.id

'file-5RkgbPcDkwSPBXZYVybbGa'

In [21]:
openai.fine_tuning.jobs.create(
    training_file=train_file.id,
    validation_file=validation_file.id,
    model="gpt-4o-mini-2024-07-18",
    seed=42,
    hyperparameters={"n_epochs": 1},
    integrations = [wandb_integration],
    suffix="labeler"
)

FineTuningJob(id='ftjob-ErjoIvTG2WFdCAhudSi06O6i', created_at=1756338705, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=1), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-NfnSn0bskbJG3TZ2c6Nd7zSb', result_files=[], seed=42, status='validating_files', trained_tokens=None, training_file='file-5RkgbPcDkwSPBXZYVybbGa', validation_file='file-FLsZM4AWrEatybLLzQS3K2', estimated_finish=None, integrations=[FineTuningJobWandbIntegrationObject(type='wandb', wandb=FineTuningJobWandbIntegration(project='gpt-labeler', entity=None, name=None, tags=None, run_id='ftjob-ErjoIvTG2WFdCAhudSi06O6i'))], metadata=None, method=Method(type='supervised', dpo=None, reinforcement=None, supervised=SupervisedMethod(hyperparameters=SupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs=1))), user_provided_suffix='labe

In [51]:
openai.fine_tuning.jobs.list(limit=1)

SyncCursorPage[FineTuningJob](data=[FineTuningJob(id='ftjob-ErjoIvTG2WFdCAhudSi06O6i', created_at=1756338705, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=1756339269, hyperparameters=Hyperparameters(batch_size=1, learning_rate_multiplier=1.8, n_epochs=1), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-NfnSn0bskbJG3TZ2c6Nd7zSb', result_files=[], seed=42, status='running', trained_tokens=None, training_file='file-5RkgbPcDkwSPBXZYVybbGa', validation_file='file-FLsZM4AWrEatybLLzQS3K2', estimated_finish=None, integrations=[FineTuningJobWandbIntegrationObject(type='wandb', wandb=FineTuningJobWandbIntegration(project='gpt-labeler', entity=None, name=None, tags=None, run_id='ftjob-ErjoIvTG2WFdCAhudSi06O6i'))], metadata=None, method=Method(type='supervised', dpo=None, reinforcement=None, supervised=SupervisedMethod(hyperparameters=SupervisedHyperparameters(batch_size=1, learning_rate_multiplier=1.8, n_epochs=1))), user_prov

In [52]:
job_id = openai.fine_tuning.jobs.list(limit=1).data[0].id

In [53]:
job_id

'ftjob-ErjoIvTG2WFdCAhudSi06O6i'

In [54]:
openai.fine_tuning.jobs.retrieve(job_id)

FineTuningJob(id='ftjob-ErjoIvTG2WFdCAhudSi06O6i', created_at=1756338705, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=1756339269, hyperparameters=Hyperparameters(batch_size=1, learning_rate_multiplier=1.8, n_epochs=1), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-NfnSn0bskbJG3TZ2c6Nd7zSb', result_files=[], seed=42, status='running', trained_tokens=None, training_file='file-5RkgbPcDkwSPBXZYVybbGa', validation_file='file-FLsZM4AWrEatybLLzQS3K2', estimated_finish=None, integrations=[FineTuningJobWandbIntegrationObject(type='wandb', wandb=FineTuningJobWandbIntegration(project='gpt-labeler', entity=None, name=None, tags=None, run_id='ftjob-ErjoIvTG2WFdCAhudSi06O6i'))], metadata=None, method=Method(type='supervised', dpo=None, reinforcement=None, supervised=SupervisedMethod(hyperparameters=SupervisedHyperparameters(batch_size=1, learning_rate_multiplier=1.8, n_epochs=1))), user_provided_suffix='labeler', usage_metrics

In [77]:
openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10).data

[FineTuningJobEvent(id='ftevent-wZ72xEiaIjNh21LjZXn3GxoK', created_at=1756340651, level='info', message='The job has successfully completed', object='fine_tuning.job.event', data={}, type='message'),
 FineTuningJobEvent(id='ftevent-XdVaGa4Feys0z0PFFQ8TM7hX', created_at=1756340645, level='info', message='Usage policy evaluations completed, model is now enabled for sampling', object='fine_tuning.job.event', data={}, type='message'),
 FineTuningJobEvent(id='ftevent-Gr8II5GnAtpQ3HBKKrXCaqXb', created_at=1756340645, level='info', message='Moderation checks for snapshot ft:gpt-4o-mini-2024-07-18:personal:labeler:C9KMhYOw passed.', object='fine_tuning.job.event', data={'blocked': False, 'results': [{'flagged': False, 'category': 'harassment/threatening', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'sexual', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'sexual/minors', 'enforcement': 'blocking'}, {'flagged': False, 'category': 'propaganda', 'enforcement': 'blocking

In [63]:
openai.fine_tuning.jobs.retrieve(job_id).status

'succeeded'

In [64]:
fine_tuned_model_name = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model

In [65]:
fine_tuned_model_name

'ft:gpt-4o-mini-2024-07-18:personal:labeler:C9KMhYOw'

In [66]:
# The prompt

def messages_for(item):
    system_message = "You estimate the sentiment of financial news. Reply only with either 0 for 'bearish', 1 for 'bullish' or 2 for 'neutral'. Don't provider any explanation"
    user_prompt = item.test_prompt().replace("\n\nSentiment is","")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": "Sentiment is"}
    ]

In [67]:
# Try this out

messages_for(test[0])

[{'role': 'system',
  'content': "You estimate the sentiment of financial news. Reply only with either 0 for 'bearish', 1 for 'bullish' or 2 for 'neutral'. Don't provider any explanation"},
 {'role': 'user',
  'content': 'What is the financial sentiment of this tweet?\n\n$DELL $HPE - Dell, HPE targets trimmed on compute headwinds https '},
 {'role': 'assistant', 'content': 'Sentiment is'}]

In [68]:
def get_label(s):
    match = re.search(r"[-+]?\d*\.\d+|\d+", s)
    return float(match.group()) if match else 2

In [69]:
get_label("The sentiment is 1 because blah blah")

1.0

In [78]:
# The function for gpt-4o-mini

def gpt_fine_tuned(item):
    response = openai.chat.completions.create(
        model=fine_tuned_model_name, 
        messages=messages_for(item),
        seed=42,
        max_tokens=7
    )
    reply = response.choices[0].message.content
    return reply

In [79]:
print(test[0].label)
print(gpt_fine_tuned(test[0]))

0
Sentiment is


In [81]:
print(test[0].test_prompt().replace("\n\nSentiment is",""))

What is the financial sentiment of this tweet?

$DELL $HPE - Dell, HPE targets trimmed on compute headwinds https 


In [73]:
Tester.test(gpt_fine_tuned, test)

1: Guess=2 Truth=0 Item: $DELL $HPE - Dell, HPE targets trimmed o...
2: Guess=2 Truth=0 Item: $PRTY - Moody's turns negative on Party ...
3: Guess=2 Truth=0 Item: $TAP - Barclays cools on Molson Coors ht...
4: Guess=2 Truth=0 Item: Analysts Eviscerate Musk's Cybertruck: "...
5: Guess=2 Truth=0 Item: Barclays assigns only a 20% chance that ...
6: Guess=2 Truth=0 Item: BTIG points to breakfast pressure for Du...
7: Guess=2 Truth=0 Item: Children's Place downgraded to neutral f...
8: Guess=2 Truth=0 Item: Clovis Oncology downgraded to in line fr...
9: Guess=2 Truth=0 Item: Downgrades 4/7: $AAN $BDN $BECN $BTE $CD...
10: Guess=2 Truth=0 Item: Goldman pulls Progressive from Goldman's...
11: Guess=2 Truth=0 Item: Hanesbrands downgraded to underperform v...
12: Guess=2 Truth=0 Item: LendingTree price target cut to $350 fro...
13: Guess=2 Truth=0 Item: Mizuho cuts XLNX target on near-term hea...
14: Guess=2 Truth=0 Item: MPLX cut at Credit Suisse on potential d...
15: Guess=2 Truth=0 Item: Nor

KeyboardInterrupt: 

In [76]:
fine_tune_train[0].label

0