In [3]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from tqdm.notebook import tqdm

### Load dataframe with manually set scores

In [4]:
# Get manual scores and merge with dates
manual_polarity = pd.read_csv('Scores/manual_polarity.csv')
manual_polarity.head()

Unnamed: 0,CleanReportName,ReportName,MyReportName,AverageManualScore,Years
0,FINAL-Q4-17-Shareholder-Letter,COMBINED-Q4-17-Shareholder-Letter-FINAL,COMBINED-Q4-17-Shareholder-Letter-FINAL,0.6,2017
1,FINAL-Q2-15-Shareholder-Letter,FINAL_Q2_15_Letter_to_Shareholders_With_Tables_,FINAL_Q2_15_Letter_to_Shareholders_With_Tables_,0.333333,2015
2,FINAL-Q3-15-Shareholder-Letter,FINAL_Q3_15_Letter_to_Shareholders_With_Tables_,FINAL_Q3_15_Letter_to_Shareholders_With_Tables_,0.166667,2015
3,FINAL-Q3-16-Shareholder-Letter,FINAL_Q3_Letter,FINAL_Q3_Letter,0.483333,2016
4,FINAL-Q1-18-Shareholder-Letter,FINAL-Q1-18-Shareholder-Letter,FINAL-Q1-18-Shareholder-Letter,0.5,2018


### Calculate metrics for sentiment models

In [6]:
# get the polarity scores for each model and evaluate them using MAE, MSE, and correlation rate
models_dict = {
    'Baseline Frequency': "Scores/baseline_frequency_polarity.csv",
    'Baseline Keyword': "Scores/baseline_keyword_polarity.csv",
    'BERT': "Scores/bert_polarity.csv",
    'Roberta': "Scores/roberta_polarity.csv",
    'TextBlob': "Scores/textblob_polarity.csv",
    'Amazon': "Scores/amazon_polarity.csv",
    'Google': "Scores/google_polarity.csv",
    'OpenAI': "Scores/openai_polarity.csv",
}

predictions = {}
years_values = {}

# iterate over the models
for model, path in models_dict.items():

    try:

        # merge predicted and manual polarity scores
        df = pd.read_csv(path)
        df = pd.merge(manual_polarity, df, left_on='MyReportName', right_on="pdf_name", how='left')
        df["polarity"] = pd.to_numeric(df["polarity"])

        # apply min max scaling to the polarity scores
        scaler = MinMaxScaler(feature_range=(df.AverageManualScore.min(), df.AverageManualScore.max()))
        # transform the predicted polarity scores
        df['predicted_scaled_scores'] = scaler.fit_transform(df[['polarity']])
        
        # get the predicted and actual scores
        predicted = df["predicted_scaled_scores"].to_numpy()
        actual = df["AverageManualScore"].to_numpy()
        
        # save the predicted scores
        predictions[model] = predicted

        # get the average polarity score for each model by year
        year_values = df.groupby('Years')['predicted_scaled_scores'].mean()
        years_values[model] = year_values

        # calculate the error metrics
        mae = mean_absolute_error(actual, predicted)
        mse = mean_squared_error(actual, predicted)
        corr_rate = np.corrcoef(actual, predicted)[0, 1]

        print(f"{model} Model {df.shape} Results:")
        print(f"Correlation between {model} polarity and manual polarity: ", corr_rate)
        print("MAE:", mae)
        print("MSE:", mse)
        print(" ")

    except:
        
        print(f"{model} failed")
        print(" ")
        print(" ")
        continue

Baseline Frequency Model (50, 8) Results:
Correlation between Baseline Frequency polarity and manual polarity:  0.03700198889930632
MAE: 0.22985990337868403
MSE: 0.07403471901227063
 
Baseline Keyword Model (50, 18) Results:
Correlation between Baseline Keyword polarity and manual polarity:  0.15866974447633433
MAE: 0.1865877974479943
MSE: 0.05668930725269321
 
BERT Model (50, 11) Results:
Correlation between BERT polarity and manual polarity:  0.07354505457257693
MAE: 0.19323449628783596
MSE: 0.06320343313455166
 
Roberta Model (50, 11) Results:
Correlation between Roberta polarity and manual polarity:  0.07494542285516889
MAE: 0.1925292053843577
MSE: 0.06273624731490812
 
TextBlob Model (50, 9) Results:
Correlation between TextBlob polarity and manual polarity:  0.1938667056272002
MAE: 0.17062031608790346
MSE: 0.04329156851574009
 
Amazon Model (50, 8) Results:
Correlation between Amazon polarity and manual polarity:  -0.0019645297253857233
MAE: 0.1850452771179042
MSE: 0.062569669252

In [8]:
# get predictions from each model and merge with manual scores
predictions_df = pd.DataFrame(predictions)
predictions_df["AverageManualScore"] = manual_polarity["AverageManualScore"]
predictions_df.head()

Unnamed: 0,Baseline Frequency,Baseline Keyword,BERT,Roberta,TextBlob,Amazon,Google,OpenAI,AverageManualScore
0,0.065459,0.443543,0.199526,0.20608,0.583147,0.65,0.405556,0.65,0.6
1,0.150483,0.611824,0.398344,0.405077,0.337562,0.316114,0.405556,0.405556,0.333333
2,0.076087,0.524438,0.376565,0.382757,0.212582,0.175407,0.161111,0.405556,0.166667
3,0.161111,0.580598,0.105269,0.118249,0.294884,0.18797,0.405556,0.405556,0.483333
4,-0.051449,0.550643,0.444919,0.44974,0.239324,0.545061,0.405556,0.405556,0.5


In [10]:
# export predictions
export_predictions = predictions_df.copy()
export_predictions["ReportName"] = manual_polarity["ReportName"]
# export_predictions.to_csv("Scores/total_predictions.csv", index=False)

In [11]:
export_predictions.head()

Unnamed: 0,Baseline Frequency,Baseline Keyword,BERT,Roberta,TextBlob,Amazon,Google,OpenAI,AverageManualScore,ReportName
0,0.065459,0.443543,0.199526,0.20608,0.583147,0.65,0.405556,0.65,0.6,COMBINED-Q4-17-Shareholder-Letter-FINAL
1,0.150483,0.611824,0.398344,0.405077,0.337562,0.316114,0.405556,0.405556,0.333333,FINAL_Q2_15_Letter_to_Shareholders_With_Tables_
2,0.076087,0.524438,0.376565,0.382757,0.212582,0.175407,0.161111,0.405556,0.166667,FINAL_Q3_15_Letter_to_Shareholders_With_Tables_
3,0.161111,0.580598,0.105269,0.118249,0.294884,0.18797,0.405556,0.405556,0.483333,FINAL_Q3_Letter
4,-0.051449,0.550643,0.444919,0.44974,0.239324,0.545061,0.405556,0.405556,0.5,FINAL-Q1-18-Shareholder-Letter


### Post Market Judgment

In [12]:
# Load the post market polarity scores
post_market_polarity = pd.read_csv("Scores/postmarket_openai.csv")
# Group by report name and get the average polarity score
post_market_polarity = post_market_polarity[["CleanReport", "score"]].groupby("CleanReport").mean().reset_index()
# Merge with manual scores
post_market_polarity = pd.merge(post_market_polarity, manual_polarity, left_on='CleanReport', right_on="CleanReportName", how='inner')
post_market_polarity.shape

(19, 7)

In [14]:
# Convert polarity scores to numeric format
post_market_polarity["score"] = pd.to_numeric(post_market_polarity["score"])

# Calculate the correlation rate
corr_rate_postmarket = post_market_polarity["score"].corr(post_market_polarity["AverageManualScore"])

# apply min max scaling to the polarity scores
scaler = MinMaxScaler(feature_range=(post_market_polarity.AverageManualScore.min(), post_market_polarity.AverageManualScore.max()))
post_market_polarity['score_scaled'] = scaler.fit_transform(post_market_polarity[['score']])

# Calculate the error metrics
predicted = post_market_polarity["score_scaled"]
actual = post_market_polarity["AverageManualScore"]
mae = mean_absolute_error(actual, predicted)
mse = mean_squared_error(actual, predicted)

print("Post Market's Results:")
print(f"Correlation between Post Market polarity and manual polarity: ", corr_rate_postmarket)
print("MAE:", mae)
print("MSE:", mse)

Post Market's Results:
Correlation between Post Market polarity and manual polarity:  0.40959620442602995
MAE: 0.16064786968218872
MSE: 0.04973931582359994
