In [2]:
import pandas as pd
import numpy as np

data = pd.read_csv('../../../data/Fine-Tuning/Updated_Pretraining_Data.csv')

## Generating JSONL file from table

In [3]:
import json

# Define a function to create the message format for each row
def create_message(row):
    system_content = "This model is trained to analyze the sentiment of news articles concerning the LNG market and predict their impact on the LNG index's opening price and volatility for the following day. Please provide estimates for the expected return (ranging from -10.0 to +10.0), the volatility effect (ranging from -10.0 to +10.0), and the duration of the impact (scaled from 0 to 10, where 0 represents no impact and 10 represents a permanent impact). Include a comment explaining the rationale behind your predictions. Consider long-term market trends, seasonal variations, global supply-demand dynamics, and macroeconomic factors that might influence LNG prices and market behavior."
    
    user_content = {
        "date": row["Timestamp"],
        "title": row["Title"],
        "summary": row["Summary"],
        "price": row["last_price"],
        "vol_annual": row["volatility_annual"]
    }

    assistant_content = {
        "return": row["T+3 return_normalized"],
        "vol": row["volatility_annual_normalized"],
        "duration": row["duration_estimated"],
        "comment": ""
    }

    message = {
        "messages": [
            {"role": "system", "content": system_content},
            {"role": "user", "content": json.dumps(user_content)},
            {"role": "assistant", "content": json.dumps(assistant_content)}
        ]
    }
    return message

# Generate messages for each row in the dataframe
messages = data.apply(create_message, axis=1).tolist()

# Write messages to a JSONL file
jsonl_file_path = './LNG_unsupervised_full.jsonl'
with open(jsonl_file_path, 'w') as outfile:
    for message in messages:
        json.dump(message, outfile)
        outfile.write('\n')

jsonl_file_path

'./test.jsonl'

## Uploading JSONL file to OpenAI for fine tuning

In [2]:
import os
from openai import OpenAI

os.environ["OPENAI_API_KEY"] = "sk-proj-FRgb9R4anIU0OBR86juqT3BlbkFJdNbwkNyAzye3opMo6lSt"
client = OpenAI()

client.files.create(
  file=open("LNG_unsupervised.jsonl", "rb"),
  purpose="fine-tune"
)

FileObject(id='file-YnizAui3Y8jXr256ROC20dYE', bytes=71616, created_at=1714427737, filename='LNG_unsupervised.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

## Starting the fine tuning job

In [3]:
client.fine_tuning.jobs.create(
  training_file="file-YnizAui3Y8jXr256ROC20dYE", 
  model="gpt-3.5-turbo"
)

FineTuningJob(id='ftjob-EnhBZn8Ed6EprbHTbiunUDWb', created_at=1714427834, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-3.5-turbo-0125', object='fine_tuning.job', organization_id='org-9rsaFJl2PIgLIQLSOLg9xdMs', result_files=[], seed=1663708322, status='validating_files', trained_tokens=None, training_file='file-YnizAui3Y8jXr256ROC20dYE', validation_file=None, integrations=[], user_provided_suffix=None, estimated_finish=None)

## Checking job status

In [6]:
# List 10 fine-tuning jobs
client.fine_tuning.jobs.list(limit=10)

SyncCursorPage[FineTuningJob](data=[FineTuningJob(id='ftjob-EnhBZn8Ed6EprbHTbiunUDWb', created_at=1714427834, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=3, batch_size=1, learning_rate_multiplier=2), model='gpt-3.5-turbo-0125', object='fine_tuning.job', organization_id='org-9rsaFJl2PIgLIQLSOLg9xdMs', result_files=[], seed=1663708322, status='running', trained_tokens=None, training_file='file-YnizAui3Y8jXr256ROC20dYE', validation_file=None, integrations=[], user_provided_suffix=None, estimated_finish=1714428394)], object='list', has_more=False)

In [7]:
# Retrieve the state of a fine-tune
client.fine_tuning.jobs.retrieve("ftjob-EnhBZn8Ed6EprbHTbiunUDWb")

# Cancel a job
# client.fine_tuning.jobs.cancel("ftjob-abc123")

# List up to 10 events from a fine-tuning job
# client.fine_tuning.jobs.list_events(fine_tuning_job_id="ftjob-abc123", limit=10)

# Delete a fine-tuned model (must be an owner of the org the model was created in)
# client.models.delete("ft:gpt-3.5-turbo:acemeco:suffix:abc123")

FineTuningJob(id='ftjob-EnhBZn8Ed6EprbHTbiunUDWb', created_at=1714427834, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=3, batch_size=1, learning_rate_multiplier=2), model='gpt-3.5-turbo-0125', object='fine_tuning.job', organization_id='org-9rsaFJl2PIgLIQLSOLg9xdMs', result_files=[], seed=1663708322, status='running', trained_tokens=None, training_file='file-YnizAui3Y8jXr256ROC20dYE', validation_file=None, integrations=[], user_provided_suffix=None, estimated_finish=1714428396)

## Start to genrate output!

In [8]:
completion = client.chat.completions.create(
  model="ft:gpt-3.5-turbo-0125:personal::9JTQBoWC",
  messages=[
    {"role": "system", "content": "This model is trained to analyze the sentiment of news articles concerning the LNG market and predict their impact on the LNG index's opening price and volatility for the following day. Please provide estimates for the expected return (ranging from -10.0 to +10.0), the volatility effect (ranging from -10.0 to +10.0), and the duration of the impact (scaled from 0 to 10, where 0 represents no impact and 10 represents a permanent impact). Include a comment explaining the rationale behind your predictions. Consider long-term market trends, seasonal variations, global supply-demand dynamics, and macroeconomic factors that might influence LNG prices and market behavior."},
    {"role": "user", "content": "{\"date\": \"2022-07-19\", \"title\": \"Russian Gas Supplies to Europe Aren\\u2019t Expected to Restart\", \"summary\": \"Europe is working on contingency plans for the possibility that the Nord Stream pipeline won\\u2019t return to operation.\", \"price\": 38.372, \"vol_annual\": 9.020759401473269}"}
  ]
)
print(completion.choices[0].message)

ChatCompletionMessage(content='{"return": -1.745878987429439, "vol": -9.317153047917131, "duration": 7.029038468046126, "comment": ""}', role='assistant', function_call=None, tool_calls=None)
