# Sentiment Analysis

## Text Blob

In [1]:
# Import pandas for data frame
import pandas as pd

In [2]:
# Read in cleaned .csv file and convert to data frame
simpsons_script = pd.read_csv("simpsons_df_fixed.csv")['raw_text']
simpsons_script = simpsons_script.to_frame()
simpsons_script.head()

Unnamed: 0,raw_text
0,ooo careful homer
1,theres no time to be careful
2,were late
3,hushed voice sorry excuse us pardon me
4,simultaneously hey norman hows it going so yo...


In [3]:
# Import library for TextBlob sentiment calculator
import textblob
from textblob import TextBlob

In [4]:
# Calculate sentiment for each line and create column for values
simpsons_script['TB_Sentiment'] = [TextBlob(word).sentiment.polarity for word in simpsons_script.raw_text]

In [5]:
# View lowest-scored sentiment lines
simpsons_script.sort_values('TB_Sentiment').head()

Unnamed: 0,raw_text,TB_Sentiment
40560,hes planning something evil i know it it must...,-1.0
103899,confessing i didnt make this terrible model m...,-1.0
18563,you look terrible,-1.0
103835,wiseguy son you buy an awful lot of airplane ...,-1.0
71458,i come to deliver a terrifying message of hop...,-1.0


In [11]:
# View lowest-scored sentiment lines
simpsons_script.sort_values('TB_Sentiment').tail()

Unnamed: 0,raw_text,TB_Sentiment
42353,impressed whistle whaddya suppose the rent is...,1.0
55244,marge this is the greatest gift any wife has ...,1.0
20568,singing its the greatest show in townnnnn gabbo,1.0
107620,impressed you dont fight like a girl or even ...,1.0
103626,milhouse real best friends often dont speak f...,1.0


In [12]:
# Import statistics package for mean
import statistics

In [13]:
# Calculate the mean sentiment
statistics.mean(simpsons_script.TB_Sentiment)

0.04613147576653909

## VADER

In [14]:
# Import library for VADER
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/AthenaXiourouppa/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [15]:
# Assign sentiment analyser as VADER
vader = SentimentIntensityAnalyzer()

In [16]:
# Calculate sentiment for each line and append to data frame
simpsons_script['VADER_Sentiment'] = [vader.polarity_scores(word) for word in simpsons_script.raw_text]

In [17]:
# Isolate compound value
simpsons_script['VADER Compound Score'] = [simpsons_script.iloc[i].VADER_Sentiment['compound'] for i in range(len(simpsons_script))]

In [18]:
# View lowest-scored sentiment lines
simpsons_script.sort_values('VADER Compound Score').head()

Unnamed: 0,raw_text,TB_Sentiment,VADER_Sentiment,VADER Compound Score
6643,singsongy hell hell hell hell hell hell hell ...,0.0,"{'neg': 0.984, 'neu': 0.016, 'pos': 0.0, 'comp...",-0.9966
84692,kill the rat kill the rat kill the rat kill t...,0.0,"{'neg': 0.701, 'neu': 0.299, 'pos': 0.0, 'comp...",-0.9933
95223,furious where the hell is abigail adams oh th...,0.285714,"{'neg': 0.525, 'neu': 0.475, 'pos': 0.0, 'comp...",-0.9884
69084,stubbornly dead dead dead dead dead dead,-0.2,"{'neg': 1.0, 'neu': 0.0, 'pos': 0.0, 'compound...",-0.9837
27570,fight fight fight fight fight fight fight fig...,0.0,"{'neg': 1.0, 'neu': 0.0, 'pos': 0.0, 'compound...",-0.9831


In [42]:
# View highest-scored sentiment lines
simpsons_script.sort_values('VADER Compound Score').tail()

Unnamed: 0,raw_text,TB_Sentiment,VADER_Sentiment,VADER Compound Score
5651,singing they love they share they share and...,0.5,"{'neg': 0.046, 'neu': 0.199, 'pos': 0.755, 'co...",0.985
7433,youre my best friend laughs youre my best fri...,1.0,"{'neg': 0.0, 'neu': 0.159, 'pos': 0.841, 'comp...",0.9859
104521,sexily and sexy heroic heroic sexily sexy her...,0.609091,"{'neg': 0.0, 'neu': 0.201, 'pos': 0.799, 'comp...",0.9859
92871,laugh it up with christ and krusty ha ha ha ...,0.3,"{'neg': 0.0, 'neu': 0.217, 'pos': 0.783, 'comp...",0.9904
73584,vengeful god loving god vengeful god loving...,0.6,"{'neg': 0.0, 'neu': 0.216, 'pos': 0.784, 'comp...",0.9926
