# Emotional Text Analysis
## Statistical text analysis for emotional extraction and some generalizations
#### Alberto Barradas
[@abcsds](http://github.com/abcsds)

## Emotion, and behaviour.

There are everal theories of emotion, all indicate that emotions are a precursor of behaviour.

## Models of Emotion
- Affect
![](img/Circumplex.jpg)
- Bio-chemical
![](img/Loevheim.png)
- Evolutionary
![](img/Ekman.jpg)
- Functional
![](img/Plutchik.png)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import string

In [None]:
emotions = ["anger",
            "anticipation",
            "disgust",
            "fear",
            "joy",
            "negative",
            "positive",
            "sadness",
            "surprise",
            "trust"]
df = pd.read_csv("dict.csv")
df.head()

In [None]:
df[emotions].sum()

In [None]:
from tweepy.streaming import StreamListener
from tweepy import Stream
from tweepy import OAuthHandler

import json
from csv import DictReader
import pandas as pd
import numpy as np
import seaborn as sns

%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
import myKeys

api_key = myKeys.api_key
api_secret = myKeys.api_secret
access_token_key = myKeys.access_token_key
access_token_secret = myKeys.access_token_secret

In [None]:
cols = ['anger', 'anticipation', 'disgust', 'fear',
        'joy', 'negative', 'positive', 'sadness', 'surprise', 'trust']

dictFile  = 'dict.csv'
mainDict = {}
with open(dictFile) as csvFile:
        reader = DictReader(csvFile)
        for row in reader:
            mainDict[row['Word']] = [int(row[i]) for i in cols]

In [None]:
class ColorListener(StreamListener):

    def __init__(self):
        self.tweets = pd.DataFrame(columns=('tweet', 'anger', 'anticipation',
                'disgust', 'fear', 'joy', 'negative', 'positive', 'sadness', 'surprise', 'trust'))

    def on_data(self, data):
        try:
            tweet = json.loads(data)
            vector = self.score(tweet)
            print(vector)
            row = pd.Series([tweet['text']]+vector, index=['tweet', 'anger', 'anticipation',
                'disgust', 'fear', 'joy', 'negative', 'positive', 'sadness', 'surprise', 'trust'])
            self.tweets = self.tweets.append(row, ignore_index=True)
        except UnboundLocalError:
            raise UnboundLocalError # Tweet doesn't have text
        except:
            pass
        return True

    def score(self, tweet):
        line = tweet['text'].replace('.','').replace(',','').replace(';','').replace(':','').replace('\t',' ').replace('\n',' ')
        words = line.split(' ')
        score = [0] * 10
        for word in words:
            if word in mainDict:
                for i in range(len(score)):
                    score[i] += mainDict[word][i]
        return score

    def on_error(self, status):
        print("Error: ", status)

In [None]:
cListener = ColorListener()
auth = OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token_key, access_token_secret)

stream = Stream(auth, cListener)

# Start reading stream for english tweets with the color words
stream.filter(languages=['en'], track=['red', 'green','blue'])

In [None]:
df = cListener.tweets
print(len(df.index)) # Number of rows

In [None]:
df.head(10) # How the data looks like

In [None]:
df.plot(figsize=(16, 6)) # Plot the sentiment as a time series

In [None]:
df['trust'].plot(figsize=(16, 6))

In [None]:
df.plot(subplots=True, figsize=(16, 10))

In [None]:
df['trust'].plot.kde(figsize=(16, 6))

In [None]:
pd.tools.plotting.scatter_matrix(df, alpha=0.2, figsize=(16, 10), diagonal='kde')

In [None]:
df.sum()

In [None]:
cor = df.corr()
cor

In [None]:
sns.heatmap(cor)

In [None]:
sns.clustermap(cor)

In [None]:
from matplotlib.pyplot import figure, show, rc

fig = figure(figsize=(10, 10))
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True)

colors = ['#007C37','#79BF2A','#EBC527','#E66F11','#DB1245','#7D4CA1','#296CAB','#1781AA']

N = 8
theta = np.arange(0, 2*np.pi, 2*np.pi/N)-(np.pi/(2*N))
radii = df[['fear','trust','joy','anticipation','anger','disgust','sadness','surprise']].sum()
width = np.pi/N
bars = ax.bar(theta, radii, width=width, bottom=0.0)
for r,bar,c in zip(radii,bars,colors):
    bar.set_facecolor(c)
    bar.set_alpha(1)

show()