## Sentiment Analysis for Trading with Reddit Text Data

Source: https://medium.com/analytics-vidhya/sentiment-analysis-for-trading-with-reddit-text-data-73729c931d01 

In [1]:
# Import libraries

import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 300)
# import praw #reddit data api
# import ffn #for loading financial data

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sn
import re #regex
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer #VADER sentiment model
import requests
import json
import csv
import time
import datetime

In [2]:
# read the reddit data

reddit_data = pd.read_csv('data/final_data.csv')
reddit_data.columns

Index(['Unnamed: 0', 'Index', 'MSFT.Adjusted', 'worldnews', 'stocks',
       'investing', 'Subjectivity_worldnews', 'Polarity_worldnews',
       'Sentiment_worldnews', 'Subjectivity_stocks', 'Polarity_stocks',
       'Sentiment_stocks', 'Subjectivity_investing', 'Polarity_investing',
       'Sentiment_investing', 'average_polarity', 'average_subjectivity',
       'Sentiment_average'],
      dtype='object')

In [3]:
reddit_data = reddit_data[['Index', 'MSFT.Adjusted', 'worldnews', 'stocks', 'investing']]
reddit_data.head()

Unnamed: 0,Index,MSFT.Adjusted,worldnews,stocks,investing
0,2022-01-03,328.727661,Twitter said Sunday it had banned the per...,Since a new year has started I have summed up ...,Since a new year has started I have summed up ...
1,2022-01-04,323.090942,In Portugal with 89 of the total population fu...,Looking for friends Let us know what Support S...,This thread is for any and all basic gameplay ...
2,2022-01-05,310.688141,The map above shows approximate locations...,What are the raw materials Financials Healthca...,Hi welcome to rinvesting Please note that as a...
3,2022-01-06,308.233124,And yet pro israelis and islamophobes are allo...,UPDATE1 Etoro response on 20211231 HERE UPDATE...,Im curious how current retirees transitioned t...
4,2022-01-07,308.390289,Ive taken several rapid tests 4 total at vario...,I bought 10 x call contracts on DFEN 1966 f...,5K votes 72 comments 519K subscribers in th...


In [4]:
## run vader sentiment analyzer

analyser = SentimentIntensityAnalyzer()

def vader_analysis(column_values):
    scores=[]
    for comments in column_values:
        sentiment_score=0
        try:
            for comment in comments:
                sentiment_score=sentiment_score+analyser.polarity_scores(comment)['compound']
        except TypeError:
            sentiment_score=0
        
        scores.append(sentiment_score)
    return scores
    
reddit_data['sentiment_score_worldnews'] = vader_analysis(reddit_data.worldnews.values)
reddit_data.head()

Unnamed: 0,Index,MSFT.Adjusted,worldnews,stocks,investing,sentiment_score_worldnews
0,2022-01-03,328.727661,Twitter said Sunday it had banned the per...,Since a new year has started I have summed up ...,Since a new year has started I have summed up ...,8.2584
1,2022-01-04,323.090942,In Portugal with 89 of the total population fu...,Looking for friends Let us know what Support S...,This thread is for any and all basic gameplay ...,6.882
2,2022-01-05,310.688141,The map above shows approximate locations...,What are the raw materials Financials Healthca...,Hi welcome to rinvesting Please note that as a...,8.2584
3,2022-01-06,308.233124,And yet pro israelis and islamophobes are allo...,UPDATE1 Etoro response on 20211231 HERE UPDATE...,Im curious how current retirees transitioned t...,7.3408
4,2022-01-07,308.390289,Ive taken several rapid tests 4 total at vario...,I bought 10 x call contracts on DFEN 1966 f...,5K votes 72 comments 519K subscribers in th...,4.588


In [5]:
reddit_data['sentiment_score_stocks'] = vader_analysis(reddit_data.stocks.values)
reddit_data['sentiment_score_investing'] = vader_analysis(reddit_data.investing.values)
reddit_data['sentiment_average_score'] = reddit_data[['sentiment_score_worldnews', 'sentiment_score_stocks', 'sentiment_score_investing']].mean(axis=1)
reddit_data.head()

Unnamed: 0,Index,MSFT.Adjusted,worldnews,stocks,investing,sentiment_score_worldnews,sentiment_score_stocks,sentiment_score_investing,sentiment_average_score
0,2022-01-03,328.727661,Twitter said Sunday it had banned the per...,Since a new year has started I have summed up ...,Since a new year has started I have summed up ...,8.2584,7.3408,7.7996,7.7996
1,2022-01-04,323.090942,In Portugal with 89 of the total population fu...,Looking for friends Let us know what Support S...,This thread is for any and all basic gameplay ...,6.882,10.5524,6.4232,7.952533
2,2022-01-05,310.688141,The map above shows approximate locations...,What are the raw materials Financials Healthca...,Hi welcome to rinvesting Please note that as a...,8.2584,6.4232,9.176,7.952533
3,2022-01-06,308.233124,And yet pro israelis and islamophobes are allo...,UPDATE1 Etoro response on 20211231 HERE UPDATE...,Im curious how current retirees transitioned t...,7.3408,14.6816,8.7172,10.246533
4,2022-01-07,308.390289,Ive taken several rapid tests 4 total at vario...,I bought 10 x call contracts on DFEN 1966 f...,5K votes 72 comments 519K subscribers in th...,4.588,4.588,3.2116,4.1292


In [6]:
reddit_data.columns

Index(['Index', 'MSFT.Adjusted', 'worldnews', 'stocks', 'investing',
       'sentiment_score_worldnews', 'sentiment_score_stocks',
       'sentiment_score_investing', 'sentiment_average_score'],
      dtype='object')

In [7]:
reddit_data[['MSFT.Adjusted', 'sentiment_score_worldnews', 'sentiment_score_stocks', 'sentiment_score_investing', 'sentiment_average_score']].describe()

Unnamed: 0,MSFT.Adjusted,sentiment_score_worldnews,sentiment_score_stocks,sentiment_score_investing,sentiment_average_score
count,481.0,481.0,481.0,481.0,481.0
mean,285.435987,8.530246,10.283415,10.259569,9.691077
std,38.73223,3.28772,2.507594,2.479666,1.764623
min,211.770172,0.0,2.7528,3.2116,4.1292
25%,252.100098,5.9644,8.7172,8.7172,8.411333
50%,281.70224,8.7172,10.0936,10.0936,9.787733
75%,319.682953,10.5524,11.9288,11.9288,10.858267
max,382.700012,18.352,17.8932,17.4344,15.446267


In [8]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

reddit_data['Index'] = pd.to_datetime(reddit_data['Index'])

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x=reddit_data['Index'], y=reddit_data['MSFT.Adjusted'], name="MSFT Adjusted"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=reddit_data['Index'], y=reddit_data['sentiment_average_score'], name="Sentiment Average Score"),
    secondary_y=True,
)

fig.update_layout(
    title_text="MSFT Adjusted and Sentiment Average Score Over Time"
)

fig.update_xaxes(title_text="Date")

fig.update_yaxes(title_text="<b>MSFT Adjusted</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Sentiment Average Score</b>", secondary_y=True)

fig.show()

In [11]:
reddit_data.to_csv('data/final_data_vadar.csv')