# Election Tweets Analysis

In [1]:
import graphlab as gl
import re
from os import path
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
from scipy.misc import imread

%matplotlib inline

[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1482445502.log


This non-commercial license of GraphLab Create for academic use is assigned to epigos@gmail.com and will expire on September 25, 2017.


### Load tweets

In [2]:
raw_tweets = gl.SFrame('data/2016_tweets.csv')

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[int,str,str,int,int,str,str,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


In [3]:
# cleanup scripts
def cleanup_text(value):
    """Cleanup tweets by removing 'retweeted', stopwords and other characters"""
    output = []
    value = re.sub(r'((retweeted))', '', value, flags=re.I)
    value = re.sub(r'[^\w\s]+', ' ', value)
    for word in value.split():
        if len(word) > 2 and word not in gl.text_analytics.stopwords():
            output.append(word.strip())
    return ' '.join(output)

In [4]:
raw_tweets['clean_text'] = raw_tweets['text'].apply(cleanup_text)

In [5]:
tweets = raw_tweets[raw_tweets['clean_text'] != '']

In [6]:
len(tweets)

65491

In [None]:
ghana_map_mask = imread('./assets/ghana_map.png')
text = ' '.join(tweets['clean_text'])
wordcloud = WordCloud(
    font_path='./assets/RobotoDraft-Regular.ttf',
    stopwords=STOPWORDS,
    background_color='white',
    max_words=1000,
    mask=ghana_map_mask
).generate(text)

In [None]:
plt.imshow(wordcloud)`1
plt.axis('off')
plt.savefig('./tweet_cloud.png', dpi=300)
plt.show()

In [8]:
model = gl.sentiment_analysis.create(tweets, features=['clean_text'])

In [13]:
tweets['sentiment'] = model.predict(tweets)

In [26]:
lexicons = gl.SFrame('../text-analysis/lexicons/lexicons_compiled.csv')

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,str,str,str,str,str,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


In [32]:
lexicons['emotion'] = lexicons['emotion'].apply(lambda x: x if x else 'irrelevant')

In [33]:
lexicons

word,emotion,color,orientation,sentiment,subjectivity,source
2-faced,irrelevant,,,negative,,opinion
2-faces,irrelevant,,,negative,,opinion
a+,irrelevant,,,positive,,opinion
aback,irrelevant,brown,,,,colour
abacus,trust,,,,,emolex
abandon,fear,,,negative,weak,emolex
abandoned,anger,black,,negative,weak,emolex
abandonment,anger,black,,negative,weak,emolex
abase,irrelevant,,,negative,strong,mpqa
abasement,irrelevant,,,negative,strong,mpqa


In [34]:
associations = gl.SFrame({'word': lexicons['word'], 'topic': lexicons['emotion']})

In [36]:
topics = associations['topic'].unique()

In [39]:
tweets['word_count'] = gl.text_analytics.count_words(tweets['clean_text'])

In [48]:
topics.sort()
int_topics = {}
for k, v in enumerate(topics.sort()):
    int_topics[v] = k
int_topics

{'anger': 0,
 'anticipation': 1,
 'disgust': 2,
 'fear': 3,
 'irrelevant': 4,
 'joy': 5,
 'sadness': 6,
 'surprise': 7,
 'trust': 8}

In [49]:
associations['topic'] = associations['topic'].apply(lambda k: int_topics[k])

In [50]:
associations

topic,word
4,2-faced
4,2-faces
4,a+
4,aback
8,abacus
3,abandon
0,abandoned
0,abandonment
4,abase
4,abasement


In [51]:
topic_model = gl.topic_model.create(tweets['word_count'], num_topics=9, num_iterations=50, associations=associations)

In [54]:
topic_model.get_topics(num_words=10, output_type='topic_words')

words
"[vote, ghana, voteakufoaddo, the, ..."
"[changinglives, the, continue, jdmahama, ..."
"[voteakufoaddo, mahama, nana, nakufoaddo, ..."
"[change, kalyppo, iqrtg, elections, the, polling, ..."
"[election, make, work, day, status, great, win, ..."
"[voteforjmnumber3, voteforchange, toabapa, ..."
"[choosechange, changinglives, ..."
"[more, votejm2016, ndc, jdmahama, voting, ..."
"[jmtoaso, ghana, president, www, don, ..."


In [62]:
def get_emotion(value):
    for k, v in int_topics.items():
        if v == value:
            return k
tweets['emotion'] = topic_model.predict(tweets['word_count'])

In [64]:
tweets['emotion'] = tweets['emotion'].apply(get_emotion)

In [66]:
tweets['emotion'].show()

Canvas is accessible via web browser at the URL: http://localhost:52164/index.html
Opening Canvas in default web browser.


In [70]:
def get_sentiment_from_score(value):
    if value > 0.60:
        return 'positive'
    elif value < 0.40:
        return 'negative'
    return 'neutral'
tweets['sentiment_label'] = tweets['sentiment'].apply(get_sentiment_from_score)

In [71]:
tweets['sentiment_label'].show()

Canvas is updated and available in a tab in the default browser.


In [None]:
sentiments = 