In [1]:
import pandas as pd
import numpy as np
import nltk

from textblob import TextBlob
from textblob.classifiers import NaiveBayesClassifier
from textblob.sentiments import NaiveBayesAnalyzer

# Sentiment Analysis 1: Create own analyzer

In [2]:
data = pd.read_csv("../Data/data.csv")

negative_words = pd.read_csv("../words_negative.csv", header=None)
positive_words = pd.read_csv("../words_positive.csv", header=None)

In [3]:
negative = negative_words.values.tolist()
positive = positive_words.values.tolist()

In [4]:
def sentiment(text):
    temp = []
    text_sent = nltk.sent_tokenize(text)
    for sentence in text_sent:
        n_count = 0
        p_count = 0
        sent_words = nltk.word_tokenize(sentence)
        for word in sent_words:
            for item in positive:
                if(word == item[0]):
                    p_count +=1
            for item in negative:
                if(word == item[0]):
                    n_count +=1
        if(p_count > 0 and n_count == 0):
            temp.append(1)
        elif(n_count%2 > 0):
            temp.append(-1)
        elif(n_count%2 == 0 and n_count > 0):
            temp.append(1)
        else:
            temp.append(0)
    return temp

In [5]:
female_lines = data['line'][data.gender == 'female']
male_lines = data['line'][data.gender == 'male']

female = female_lines.values.tolist()
male = male_lines.values.tolist()

In [6]:
np.average(sentiment(str(female)))

0.010581576572881633

In [7]:
np.average(sentiment(str(male)))

0.028802199690668499

According to my sentiment function, male characters, on average, have a more positive sentiment.

# Sentiment Analysis 2: Using TextBlob 

In [8]:
data[['polarity', 'subjectivity']] = data['line'].apply(lambda line: pd.Series(TextBlob(line).sentiment))

In [9]:
data.head()

Unnamed: 0,line_no,character,line,gender,n_words,season,episode,season_episode,episode_no,polarity,subjectivity
0,1,ROYCE,What do you expect? They're savages. One lot s...,male,24,1,1,s01e01,1,-0.125,0.375
1,2,WILL,I've never seen wildlings do a thing like this...,male,21,1,1,s01e01,1,0.0,0.0
2,3,ROYCE,How close did you get?,male,5,1,1,s01e01,1,0.0,0.0
3,4,WILL,Close as any man would.,male,5,1,1,s01e01,1,0.0,0.0
4,5,GARED,We should head back to the wall.,male,7,1,1,s01e01,1,0.0,0.0


### Gender Comparison

In [10]:
gender = data.groupby(['gender'])['line'].apply(lambda x: ' '.join(x)).reset_index()
gender.head()

Unnamed: 0,gender,line
0,female,"Fine work, as always. Well done. Thank you. ..."
1,male,What do you expect? They're savages. One lot s...


In [11]:
gender[['polarity', 'subjectivity']] = gender['line'].apply(lambda line: pd.Series(TextBlob(line).sentiment))
gender

Unnamed: 0,gender,line,polarity,subjectivity
0,female,"Fine work, as always. Well done. Thank you. ...",0.104083,0.545007
1,male,What do you expect? They're savages. One lot s...,0.093597,0.519262


### Season Comparison

In [12]:
season = data.groupby(['season','gender'])['line'].apply(lambda x: ' '.join(x)).reset_index()
season.head()

Unnamed: 0,season,gender,line
0,1,female,"Fine work, as always. Well done. Thank you. ..."
1,1,male,What do you expect? They're savages. One lot s...
2,2,female,"It was well struck, Your Grace. Yes, Your Grac..."
3,2,male,"Well struck… Well struck, Dog. Did you like th..."
4,3,female,"First time you've seen a giant, Jon Snow? Well..."


In [13]:
season[['polarity', 'subjectivity']] = season['line'].apply(lambda line: pd.Series(TextBlob(line).sentiment))
season

Unnamed: 0,season,gender,line,polarity,subjectivity
0,1,female,"Fine work, as always. Well done. Thank you. ...",0.080377,0.54294
1,1,male,What do you expect? They're savages. One lot s...,0.071835,0.516616
2,2,female,"It was well struck, Your Grace. Yes, Your Grac...",0.091241,0.548872
3,2,male,"Well struck… Well struck, Dog. Did you like th...",0.102007,0.522084
4,3,female,"First time you've seen a giant, Jon Snow? Well...",0.136647,0.563371
5,3,male,"Brother? Did you send the ravens? Tarly, look ...",0.087357,0.524007
6,4,female,Beautiful. But pale. Timid. Timid bores me. Sh...,0.090225,0.54606
7,4,male,Magnificent. Mm-hmm. Looks fresh-forged. It is...,0.084159,0.514094
8,5,female,We shouldn’t be out here alone. Why not? If...,0.12362,0.5545
9,5,male,"Your grace, we are honored by your presence. ...",0.121507,0.527731


### Episode Comparison

In [14]:
episode = data.groupby(['episode_no','gender'])['line'].apply(lambda x: ' '.join(x)).reset_index()
episode.head()

Unnamed: 0,episode_no,gender,line
0,1,female,"Fine work, as always. Well done. Thank you. ..."
1,1,male,What do you expect? They're savages. One lot s...
2,2,female,Isn't there anything else? Khaleesi! Your h...
3,2,male,"You need to drink, child... And eat. The Dot..."
4,3,female,"Please, it's nearly healed. A King should ha..."


In [15]:
episode[['polarity', 'subjectivity']] = episode['line'].apply(lambda line: pd.Series(TextBlob(line).sentiment))

In [16]:
# gender.to_csv('../data/sentiment_gender.csv',index=False)
# season.to_csv('../data/sentiment_season.csv',index=False)
# episode.to_csv('../data/sentiment_episode.csv',index=False)

### Gender Comparison: Creator Perspective

s02e04, s02e06, s03e02 were written by a <b>female</b> writer.
<br>
The rest were written by <b>male</b> writers. 
<br>
s05e04, s06e02, s07e05 were written by a <b>male</b> writer -- these were the only episodes he wrote for. Fair comparison, perhaps!

In [17]:
f_writer = data[(data.season_episode == 's02e04') | (data.season_episode == 's02e06') | (data.season_episode == 's03e02')]
m_writer = data[(data.season_episode == 's05e04') | (data.season_episode == 's06e02') | (data.season_episode == 's07e05')]

f_writer = f_writer.groupby(['gender'])['line'].apply(lambda x: ' '.join(x)).reset_index()
m_writer = m_writer.groupby(['gender'])['line'].apply(lambda x: ' '.join(x)).reset_index()

In [18]:
f_writer[['polarity', 'subjectivity']] = f_writer['line'].apply(lambda line: pd.Series(TextBlob(line).sentiment))
f_writer

Unnamed: 0,gender,line,polarity,subjectivity
0,female,The rot's set in. Shh. The rot will spread. If...,0.121766,0.6065
1,male,It's got to be the Mountain. He's the biggest....,0.078626,0.523495


In [19]:
m_writer[['polarity', 'subjectivity']] = m_writer['line'].apply(lambda line: pd.Series(TextBlob(line).sentiment))
m_writer

Unnamed: 0,gender,line,polarity,subjectivity
0,female,How much can the Crown afford? You're the Ma...,0.142752,0.566826
1,male,"Is that Estermont? Tarth, Ser Jaime. . The S...",0.071528,0.493794
