## 1. Importing the required libraries

In [391]:
from openai import OpenAI
import os
import pandas as pd
import math
import numpy as np
from datetime import datetime

## 2. Settting up the API connections and Chat Completion Function

In [392]:



os.environ["OPENAI_API_KEY"] = YOUR_OPENAI_API_KEY
GPT_MODEL = "gpt-3.5-turbo"

def chat_completion_request(messages, tools=None, tool_choice=None, model=GPT_MODEL):
    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            tools=tools,
            tool_choice=tool_choice,
            temperature = 0,
        )
        return response
        
    except Exception as e:
        print("Unable to generate ChatCompletion response")
        print(f"Exception: {e}")
        return e

## 3. Reading the data and basic EDA

In [393]:
## Data is taken from https://www.kaggle.com/datasets/sbhatti/financial-sentiment-analysis
df = pd.read_csv("data 2.csv")
df.head()

Unnamed: 0,Sentence,Sentiment
0,The GeoSolutions technology will leverage Bene...,positive
1,"$ESI on lows, down $1.50 to $2.50 BK a real po...",negative
2,"For the last quarter of 2010 , Componenta 's n...",positive
3,According to the Finnish-Russian Chamber of Co...,neutral
4,The Swedish buyout firm has sold its remaining...,neutral


In [408]:
#total sentences and Sentiment Distribution
print("Total no. of sentences: ", df.shape[0])
df["Sentiment"].value_counts()

Total no. of sentences:  5842


Sentiment
neutral     3130
positive    1852
negative     860
Name: count, dtype: int64

In [396]:
df.to_clipboard()

## 4. Taking a sample dataset for Prompt Engineering

In [397]:
#taking validation sample of 100 sentences randomly 
df_validation = df.sample(100, random_state = 1)
df_validation = df_validation.reset_index(drop=True).reset_index()
df_validation["Sentiment"].value_counts()

Sentiment
neutral     49
positive    35
negative    16
Name: count, dtype: int64

In [398]:
df_validation

Unnamed: 0,index,Sentence,Sentiment
0,0,The loss for the third quarter of 2007 was EUR...,positive
1,1,The winner does not have to be present to win .,neutral
2,2,$BOBE down $2.84 premarket,negative
3,3,$MTD (Mettler-Toledo) ready for a breakout,positive
4,4,Finnish M-real 's Kyro mill is starting negoti...,neutral
...,...,...,...
95,95,"According to Bosse , the present cooperation i...",positive
96,96,$NUGT Gold above 1400...wow,positive
97,97,"In Finland , Hamina Cargo Terminal , of Finnis...",neutral
98,98,Small investors have voiced fears that the sha...,neutral


## 5. Prompt Engineering 

In [400]:

system_message = '''
            As a financial analyst, your task is to analyse the given information or news and identify the sentiment. 
            Please output one of followings: "positive", "negative" or "neutral". 
            please output the confidence score ranging between 0 to 1 which indicates your level of cetrainity.
            Output should be in JSON format. Please output the key of each sentence also.
            
        
          '''

user_message_1 = '''
                Below are the list of the sentences:  \
                {0: "The measures result from weak demand in the shipbuilding industry ."
                 1: "The closing of such transaction took place today ."
                 2: "The operating loss amounted to EUR 0.8 mn , compared to a profit of EUR 3.9 mn a year earlier ."
                 }
                
                '''

assistant_message_1 = ''' 
                        {"0":["negative", 0.8], "1" : ["neutral", 0.9], "2" : ["neutral", 0.9]}
                      '''



user_message_2 = ''' 
                Below are the list of the sentences:  \
                {3: "We are pleased to welcome Tapeks Noma into Cramo group ."
                 4: "Adjusted for changes in the Group structure , the Division 's net sales increased by 1.7 % ."
                 5: "The number of bodily injury cases quadrupled in 2000-2006 ."}

                 '''


assistant_message_2 = ''' 
                        {"3":["positive", 0.95], "4" : ["positive", 0.9], "5" : ["negative", 1]}
                      '''

user_message = ''' Below are the list of the sentences:  \
                    {}
               '''

## 6. Running the LLM 

In [401]:
batch_size = 10
num_batches = math.ceil(df_validation.shape[0]/batch_size)
df_final_results = pd.DataFrame()
prompt_tokens_list = []
completion_tokens_list = []
start_time = datetime.now() 
print(start_time)
for batch in range(10):

    keys = df_validation["index"].values[batch*batch_size:batch_size+batch*batch_size]
    sentences = df_validation["Sentence"].values[batch*batch_size:batch_size+batch*batch_size]
    sentence_dict = dict(zip(keys, sentences))
    messages = [{"role": "system", "content": system_message},
               {"role": "user", "content": user_message_1},
               {"role": "assistant", "content": assistant_message_1},
               {"role": "user", "content": user_message_2},
               {"role": "assistant", "content": assistant_message_2},
               {"role": "user", "content": user_message.format(sentence_dict)}]
    response  = chat_completion_request(messages, tools=None, tool_choice=None, model=GPT_MODEL)
    response_ = response.choices[0].message.content
    prompt_tokens_list.append(response.usage.prompt_tokens)
    completion_tokens_list.append(response.usage.completion_tokens)
    
    llm_output_json = eval(response_.strip('\''))
    llm_output_json1 = {int(key):llm_output_json[key] for key in llm_output_json}
    df_results = pd.DataFrame.from_dict(llm_output_json1).T.reset_index()
    df_results.columns = ["index", "Sentiment_LLM", "Confidence_Score"]
    df_final_results = pd.concat([ df_final_results, df_results])

end_time = datetime.now()
time_difference = (end_time - start_time).total_seconds()

print(end_time)    
print("time in seconds", time_difference)

2024-04-19 10:37:06.421058
0
1
2
3
4
5
6
7
8
9
2024-04-19 10:37:37.664355
time in seconds 31.243297


In [413]:
print("Average Input Tokens: ", np.mean(prompt_tokens_list))
print("Average Completion Tokens: ", np.mean(completion_tokens_list))

Average Input Tokens:  684.4
Average Completion Tokens:  122.9


In [412]:
np.mean(prompt_tokens_list)

684.4

## 7. Result Analysis

In [410]:
df_final_results1  = df_final_results.merge(df_validation, how="left" ) 
df_final_results1["IS_MATCH"] = np.where(df_final_results1["Sentiment_LLM"]==df_final_results1["Sentiment"], 1, 0)
accuracy = df_final_results1["IS_MATCH"].sum()*100/df_final_results1.shape[0]
print("Accuracy =: ", accuracy, "%")

Accuracy =:  67.0 %
