In [1]:
import pandas as pd

In [2]:
import flair

In [3]:
model = flair.models.TextClassifier.load('en-sentiment')

2023-02-09 00:40:43,202 loading file C:\Users\u042\.flair\models\sentiment-en-mix-distillbert_4.pt


In [4]:
def get_sentiment(text):
    sentence = flair.data.Sentence(text)
    model.predict(sentence)
    sentiment = sentence.labels[0]
    return sentiment

In [5]:
df = pd.read_csv('reddit_investing_ner.csv', sep='|')
df.head()

Unnamed: 0.1,Unnamed: 0,name,created_utc,subreddit,title,selftext,upvote_ratio,ups,downs,score,organizations
0,0,t3_10xdp3e,1675897000.0,investing,Equity award wash sale question,Another wash sale related question - this is q...,1.0,1.0,0.0,1.0,['RSU']
1,1,t3_10x87sw,1675884000.0,investing,Is this Steichen's photo of Brancusi (original...,[https://www.artsy.net/artwork/6255cd62cfdde30...,0.45,0.0,0.0,0.0,[]
2,2,t3_10x79eo,1675882000.0,investing,Possible to create your own Mutual Fund?,"I don't mean *literally* what the title says, ...",0.73,11.0,0.0,11.0,"['BND', 'SLV', 'MSFT']"
3,3,t3_10x5x5t,1675879000.0,investing,Roth Ira portfolio opinions,So I'm pretty new to having a self managed Rot...,1.0,3.0,0.0,3.0,"['FSRNX - Real', 'FSDIX']"
4,4,t3_10x4ykp,1675877000.0,investing,Are there any good reasons to keep cash,I will save some money every week. \n\nWith th...,0.83,12.0,0.0,12.0,[]


In [6]:
df['sentiment'] = df['selftext'].apply(get_sentiment)

In [7]:
import ast

In [8]:
df['organizations'] = df['organizations'].apply(lambda x: ast.literal_eval(x))

In [9]:
sentiment = {}

for i, row in df.iterrows():
    direction = row['sentiment'].value
    score = row['sentiment'].score
    for org in row['organizations']:
        if org not in sentiment.keys():
            sentiment[org] = {'POSITIVE': [], 'NEGATIVE': []}
        sentiment[org][direction].append(score)       

In [10]:
sentiment['Meta']

{'POSITIVE': [0.9902945756912231],
 'NEGATIVE': [0.781670093536377,
  0.9822782278060913,
  0.9863225221633911,
  0.9992108345031738,
  0.9999780654907227]}

In [11]:
avg_sentiment = []

for org in sentiment.keys():
    pos_freq = len(sentiment[org]['POSITIVE'])
    neg_freq = len(sentiment[org]['NEGATIVE'])
    for direction in ['POSITIVE', 'NEGATIVE']:
        score = sentiment[org][direction]
        if len(score) == 0:
            sentiment[org][direction] = 0.0
        else:
            sentiment[org][direction] = sum(score)
    total = sentiment[org]['POSITIVE'] - sentiment[org]['NEGATIVE']
    avg = total / (pos_freq+neg_freq)
    pos_avg = sentiment[org]['POSITIVE'] / pos_freq if pos_freq != 0 else 0
    neg_avg = sentiment[org]['NEGATIVE'] / neg_freq if neg_freq != 0 else 0
    avg_sentiment.append({
        'entity': org,
        'positive': pos_avg,
        'negative': neg_avg,
        'frequency': pos_freq + neg_freq,
        'score': avg
    })

In [12]:
avg_sentiment[:5]

[{'entity': 'RSU',
  'positive': 0,
  'negative': 0.9995510578155518,
  'frequency': 3,
  'score': -0.9995510578155518},
 {'entity': 'BND',
  'positive': 0.6670458018779755,
  'negative': 0.9393403927485148,
  'frequency': 8,
  'score': -0.5377438440918922},
 {'entity': 'SLV',
  'positive': 0,
  'negative': 0.9370343685150146,
  'frequency': 1,
  'score': -0.9370343685150146},
 {'entity': 'MSFT',
  'positive': 0.8695935010910034,
  'negative': 0.9788570205370585,
  'frequency': 5,
  'score': -0.23947681188583375},
 {'entity': 'FSRNX - Real',
  'positive': 0,
  'negative': 0.9998732805252075,
  'frequency': 1,
  'score': -0.9998732805252075}]

In [13]:
sentiment_df = pd.DataFrame(avg_sentiment)

In [14]:
sentiment_df.head()

Unnamed: 0,entity,positive,negative,frequency,score
0,RSU,0.0,0.999551,3,-0.999551
1,BND,0.667046,0.93934,8,-0.537744
2,SLV,0.0,0.937034,1,-0.937034
3,MSFT,0.869594,0.978857,5,-0.239477
4,FSRNX - Real,0.0,0.999873,1,-0.999873


In [16]:
sentiment_df = sentiment_df[sentiment_df['frequency'] > 3]
sentiment_df.head()

Unnamed: 0,entity,positive,negative,frequency,score
1,BND,0.667046,0.93934,8,-0.537744
3,MSFT,0.869594,0.978857,5,-0.239477
7,SEP,0.0,0.999241,4,-0.999241
10,Meta,0.990295,0.949892,6,-0.626528
13,AMD,0.987781,0.993018,4,-0.002618


In [17]:
sentiment_df.sort_values('score', ascending=False).head(10)

Unnamed: 0,entity,positive,negative,frequency,score
22,Intel,0.976925,0.992473,4,0.484576
13,AMD,0.987781,0.993018,4,-0.002618
24,AI,0.92483,0.952254,10,-0.013712
577,FCF,0.963932,0.996048,4,-0.016058
169,YouTube,0.780517,0.932506,4,-0.075994
487,CDS,0.712937,0.984754,4,-0.135909
290,NYSE,0.859209,0.846641,5,-0.164301
189,EPS,0.840113,0.998872,9,-0.181545
3,MSFT,0.869594,0.978857,5,-0.239477
16,Microsoft,0.943948,0.975882,11,-0.277762


In [18]:
sentiment_df.sort_values('score').head(10)

Unnamed: 0,entity,positive,negative,frequency,score
7,SEP,0.0,0.999241,4,-0.999241
27,FAQ,0.0,0.998622,42,-0.998622
28,wiki,0.0,0.998622,42,-0.998622
151,ROTH,0.0,0.998298,5,-0.998298
201,Adani,0.0,0.996327,4,-0.996327
363,ITM,0.0,0.994098,4,-0.994098
73,treasury,0.0,0.992661,11,-0.992661
108,IBKR,0.0,0.990327,5,-0.990327
299,TD Ameritrade,0.0,0.989434,4,-0.989434
164,APY,0.0,0.985655,4,-0.985655
