In [None]:
%load_ext ipython_clipboard
import pandas as pd
import numpy as np
from utils import *

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
def year(ri):
    year = ri.split('-')[1]
    return '20' + year if int(year) <= 22 else '19' + year

In [None]:
finbert_predictions = load_pkl('data/sentiment_analysis/df_finbert_predictions.pkl')
finbert_predictions

In [None]:
ris = finbert_predictions[~finbert_predictions.finbert_positive.isna()].report_identity

In [None]:
%clip
finbert_predictions[['finbert_negative', 'finbert_neutral', 'finbert_positive']].describe().to_latex(float_format='%.3f', bold_rows=True, longtable=True, caption='Finbert prediction output', label='finbert', header=['Negative', 'Neutral', 'Positive'])


In [None]:
df = load_pkl('data/sentiment_analysis/df_vader.pkl')
# df = df[df.report_identity.isin(ris)]
df = df[~df.report_identity.isna()]

In [None]:
df['year'] = df.report_identity.apply(lambda ri: year(ri))
df

In [None]:
%clip
df[['vader_polarity_neg', 'vader_polarity_neu', 'vader_polarity_pos']].describe().to_latex(float_format='%.3f', bold_rows=True, longtable=True, caption='Vader predictions (without pre-processing)', label='vader_no_pre', header=['Negative', 'Neutral', 'Positive'])

In [None]:
%clip
df[['vader_preprocessed_polarity_neg', 'vader_preprocessed_polarity_neu', 'vader_preprocessed_polarity_pos']].describe().to_latex(float_format='%.3f', bold_rows=True, longtable=True, caption='Vader predictions (with pre-processing)', label='vader_pre', header=['Negative', 'Neutral', 'Positive'])

In [None]:
df = load_pkl('data/sentiment_analysis/df_tb.pkl')
# df = df[df.report_identity.isin(ris)]
df = df[~df.report_identity.isna()]


In [None]:
%clip
df[['textblob_sentiment_polarity', 'textblob_sentiment_subjectivity']].describe().to_latex(float_format='%.3f', bold_rows=True, longtable=True, caption='TextBlob predictions (without pre-processing)', label='textblob_no_pre', header=['Polarity', 'Subjectivity'])

In [None]:
%clip
df[['textblob_preprocessed_sentiment_polarity', 'textblob_preprocessed_sentiment_subjectivity']].describe().to_latex(float_format='%.3f', bold_rows=True, longtable=True, caption='TextBlob predictions (with pre-processing)', label='textblob_pre', header=['Polarity', 'Subjectivity'])

In [None]:
df = load_pkl('data/sentiment_analysis/df_flair.pkl')
# df = df[df.report_identity.isin(ris)]
df = df[~df.report_identity.isna()]


In [None]:
df[['flair_sentiment_tag', ]].value_counts()

In [None]:
%clip
df[['flair_sentiment_tag', 'flair_preprocessed_sentiment_tag']].value_counts().to_latex(float_format='%.3f',  longtable=True, caption='TextBlob predictions (without pre-processing)', label='textblob_no_pre', header=[ 'Score'])

In [None]:
import os
from transformers import pipeline
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob
import flair

vader = SentimentIntensityAnalyzer()
flair_sentiment = flair.models.TextClassifier.load('en-sentiment')

def get_flair_sentiment(text):
    s = flair.data.Sentence(text)
    flair_sentiment.predict(s)
    return s.tag, s.score
s = "In Q4 2020, while the market was down 10%, our fund remarkably only lost 0.1%."
classifier = pipeline('sentiment-analysis')

# Simple test
result = classifier(s)
fin_classifier = pipeline('sentiment-analysis', model='ProsusAI/finbert')
# Simple test as above with different and more realistic output
fin_result = fin_classifier(s)


In [None]:
results = {'Vader': vader.polarity_scores(s),
 'TextBlob': str(TextBlob(s).sentiment),
 'LSTM (flair)': get_flair_sentiment(s),
 'BERT': result,
 'FinBERT': fin_result}

results

In [None]:
df = pd.DataFrame([[k,v] for (k,v) in results.items()], columns=['model', 'output'])

df['takeaway'] = ['Neutral', 'Neutral & Subjective', 'Negative', 'Negative', 'Positive']
df.set_index('model', inplace=True, )
#df.reset_index(inplace=True)
df

In [None]:
%clip
pd.set_option('display.max_colwidth', None)

df.to_latex(float_format='%.3f', bold_rows=True, longtable=True, caption=f'Comparison of models for a simple input', label='sentiment_analysis_results')

In [None]:
# model         # output                                                # takeaway
#vader #({'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0} #neutal
#finbert     # [{'label': 'positive', 'score': 0.9165573716163635}])    #positive

In [None]:
df = finbert_predictions.filter(regex='finbert_')[~finbert_predictions.finbert_positive.isna()]


In [None]:

import matplotlib.pyplot as plt
import seaborn as sns

fig, ax = plt.subplots(1, 3, figsize=(30, 10))

for i in range(ax.shape[0]):
        sns.distplot(df[df.columns[i]], ax=ax[i])

In [None]:
df

In [None]:
sns.reset_orig()
sns.set(style="darkgrid")
sns.set_style("whitegrid", {'axes.grid' : False})
sns.set(rc={"figure.figsize":(18, 8)})
sns.displot(df.melt(), x='value',hue="variable",kind="kde", fill=True)

In [None]:
df.melt(var_name='Prediction').value.sum()/2155