# NFL Sentiment Analysis

I use vaderSentiment to find the sentiment of each individual comment, then filter to only comments from flairs of the team who just recently drafted a player. For example, the Bengals picked Round 1 Pick 1, so I only capture comments from Bengals fans until the Redskins picked Round 1 Pick 2 (or 8 minutes passed). This was to capture the raw, immediate reaction of fans as their team drafted players.

In [1]:
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import Counter
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [2]:
r1 = pd.read_json('data/round1.json')
r2_3 = pd.read_json('data/round2_3.json')
r4_7 = pd.read_json('data/round4_7.json')

In [3]:
comments = pd.concat([r1, r2_3, r4_7])
comments.columns = ['Thread', 'User', 'Flair', 'Comment', 'Score', 'Awards', 'Time']
comments.head(2)

Unnamed: 0,Thread,User,Flair,Comment,Score,Awards,Time
0,2020 NFL Draft Day 1 Post-draft Discussion,T_Raycroft,Raiders,That Love pick was something else. Holy shit.\...,1218,0,1587701451
1,2020 NFL Draft Day 1 Post-draft Discussion,bears_34,Colts,“Keep the booze coming too” \n\n(I know it was...,316,0,1587701709


In [4]:
comments.Time = pd.to_datetime(comments.Time, unit='s') - pd.Timedelta('4 hours')

In [5]:
comments.Comment = comments.Comment.str.replace(r'http\S+', '')
comments.Comment = comments.Comment.str.replace('\n', ' ')

In [6]:
analyzer = SentimentIntensityAnalyzer()

In [7]:
comments['Sentiment'] = comments.Comment.apply(lambda x: analyzer.polarity_scores(x)['compound'])

In [8]:
comments['Round'] = comments.Thread.apply(lambda x: x[:7])
comments.loc[~comments.Round.str.startswith('Round'), 'Round'] = 'None'

In [9]:
comments['TeamPick'] = comments.Thread.apply(lambda x: x[x.rfind(' ') + 1:-1])
comments.loc[comments.TeamPick.isin(['Discussio', 'Spoiler']), 'TeamPick'] = 'None'

In [10]:
team_names = comments.TeamPick.unique().tolist()
team_names.remove('None')

In [11]:
comments.loc[~comments.Flair.isin(team_names), 'Flair'] = 'Other'
comments.head()

Unnamed: 0,Thread,User,Flair,Comment,Score,Awards,Time,Sentiment,Round,TeamPick
0,2020 NFL Draft Day 1 Post-draft Discussion,T_Raycroft,Raiders,That Love pick was something else. Holy shit. ...,1218,0,2020-04-24 00:10:51,-0.0258,,
1,2020 NFL Draft Day 1 Post-draft Discussion,bears_34,Colts,“Keep the booze coming too” (I know it was b...,316,0,2020-04-24 00:15:09,0.0,,
2,2020 NFL Draft Day 1 Post-draft Discussion,Conglossian,Panthers,[Vrabel's draft room is the highlight. It has ...,545,0,2020-04-24 00:11:09,0.34,,
3,2020 NFL Draft Day 1 Post-draft Discussion,mostinterestingtroll,Patriots,RIP Rodgers.,227,0,2020-04-24 00:11:16,0.0,,
4,2020 NFL Draft Day 1 Post-draft Discussion,ucaliptastree,Ravens,First half: Predictable Second half: WTF,649,0,2020-04-24 00:11:29,-0.6739,,


In [12]:
pick_threads = comments.loc[comments.TeamPick != 'None']
discussion_thread = comments.loc[comments.TeamPick == 'None'].drop(['TeamPick', 'Round'], axis=1).\
                sort_values('Time')
first_comment = pick_threads.sort_values(['Thread', 'Time']).groupby('Thread').first().\
                reset_index()[['Time', 'TeamPick', 'Round']].sort_values('Time')
dicussion_w_team_pick = pd.merge_asof(discussion_thread, first_comment, on='Time', 
                tolerance=pd.Timedelta(8, unit='min'), direction='backward')

In [13]:
dicussion_w_team_pick['TeamPick'].fillna('None', inplace=True)
dicussion_w_team_pick['Round'].fillna('None', inplace=True)
dicussion_w_team_pick.sample(5)

Unnamed: 0,Thread,User,Flair,Comment,Score,Awards,Time,Sentiment,TeamPick,Round
45123,2020 NFL Draft Day 3 Discussion thread - Spoilers,___Rand___,Other,"If you saw him in 2017 and 18, he gets 1st rou...",1,0,2020-04-25 15:10:13,0.8584,Bills,Round 5
6830,2020 NFL Draft Day 1 Discussion thread - Spoilers,Sweetness4455,Redskins,Fuccccccck yeeeeaaahhh!!!!!,1,0,2020-04-23 20:36:48,0.0,Redskins,Round 1
14703,2020 NFL Draft Day 1 Discussion thread - Spoilers,kickstandheadass,Giants,"Oh don't worry, I know it ain't, but I didn't ...",1,0,2020-04-23 21:23:35,-0.7001,Cardinals,Round 1
32126,2020 NFL Draft Day 2 Discussion thread - Spoilers,OrangeJr36,Dolphins,We've been completely Competent so far and I'm...,4,0,2020-04-24 18:45:30,-0.079,,
26907,2020 NFL Draft Day 1 Discussion thread - Spoilers,rondell_jones,Giants,“AIYUK AIYUK AIYUK” - Seth Rogen,10,0,2020-04-23 23:15:57,0.0,49ers,Round 1


In [14]:
comments = pd.concat([pick_threads, dicussion_w_team_pick])
comments.shape

(105931, 10)

In [15]:
# might have to filter out short comments

In [16]:
# what i want to find 
# 1. which team was most postive about their draft overall, then by specific draft pick

###### Overall Sentiment

In [17]:
comments.Sentiment.mean()

0.06758839055612274

###### Overall Sentiment by Flair

In [18]:
comments.groupby('Flair')['Sentiment'].mean().reset_index().sort_values('Sentiment', ascending=False)

Unnamed: 0,Flair,Sentiment
4,Broncos,0.101639
7,Cardinals,0.098341
9,Chiefs,0.096979
27,Saints,0.084725
29,Steelers,0.081874
0,49ers,0.080789
8,Chargers,0.079863
10,Colts,0.07956
3,Bills,0.076754
15,Giants,0.076253


###### Sentiment by team for their draft picks (pick thread & time in discussion)

In [19]:
fan_pick = comments.loc[(comments.Flair == comments.TeamPick)]

In [20]:
fan_pick.groupby('Flair')['Sentiment'].agg(['mean', 'count']).reset_index().sort_values('mean', ascending=False)

Unnamed: 0,Flair,mean,count
28,Steelers,0.231983,281
9,Chiefs,0.203963,361
8,Chargers,0.18834,270
4,Broncos,0.178922,689
2,Bengals,0.163319,370
10,Colts,0.162905,345
7,Cardinals,0.15696,401
18,Lions,0.14119,732
26,Saints,0.13759,344
12,Dolphins,0.133209,646


###### Sentiment by Flair & Round

In [21]:
tm_round = fan_pick.groupby(['Flair', 'Round'])['Sentiment'].agg(['mean', 'count']).reset_index()
tm_round = tm_round.loc[tm_round['count'] > 30].sort_values('mean', ascending=False)
tm_round.head(10)

Unnamed: 0,Flair,Round,mean,count
45,Chargers,Round 4,0.355225,40
3,49ers,Round 7,0.301883,36
43,Cardinals,Round 7,0.28147,47
10,Bengals,Round 4,0.254304,56
142,Ravens,Round 6,0.251185,33
49,Chiefs,Round 1,0.250506,205
159,Steelers,Round 2,0.243376,135
70,Dolphins,Round 5,0.232526,34
162,Steelers,Round 6,0.22614,40
88,Giants,Round 4,0.222273,49


In [22]:
tm_round.tail(10)

Unnamed: 0,Flair,Round,mean,count
168,Titans,Round 1,0.030857,58
82,Falcons,Round 3,0.02902,51
61,Cowboys,Round 2,0.028312,174
65,Cowboys,Round 7,0.027686,63
114,Packers,Round 3,0.018685,163
92,Jaguars,Round 1,0.017899,150
127,Patriots,Round 6,0.010353,47
132,Rams,Round 2,0.004357,87
113,Packers,Round 2,-0.01107,348
74,Eagles,Round 2,-0.057193,684


###### Sentiment for Flair & Round 1

In [23]:
tm_round.loc[tm_round.Round == 'Round 1']

Unnamed: 0,Flair,Round,mean,count
49,Chiefs,Round 1,0.250506,205
20,Broncos,Round 1,0.18952,261
39,Cardinals,Round 1,0.157681,221
149,Saints,Round 1,0.156838,170
44,Chargers,Round 1,0.15599,210
27,Browns,Round 1,0.153449,180
66,Dolphins,Round 1,0.129109,376
80,Falcons,Round 1,0.127761,174
173,Vikings,Round 1,0.122226,378
7,Bengals,Round 1,0.11935,151
