# Election Tweets Analysis

## Load Python Dependencies

In [None]:
import graphlab as gl
import re
from os import path
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud, STOPWORDS
from scipy.misc import imread
import pandas as pd

%matplotlib inline
pd.set_option('display.max_colwidth', -1)

### Load and clean tweets

In [None]:
raw_tweets = gl.SFrame('data/tweets.csv')

In [None]:
# cleanup scripts
def cleanup_text(value):
    """Cleanup tweets by removing 'retweeted', stopwords and other characters"""
    output = []
    value = re.sub(r'((retweeted))', '', value, flags=re.I)
    value = re.sub(r'[^\w\s]+', ' ', value)
    for word in value.split():
        if len(word) > 2 and word not in gl.text_analytics.stopwords():
            output.append(word.strip())
    return ' '.join(output)

In [None]:
raw_tweets['clean_text'] = raw_tweets['text'].apply(cleanup_text)

In [None]:
tweets = raw_tweets[raw_tweets['clean_text'] != '']

In [None]:
tweets.show()

# Wordcloud

In [None]:
ghana_map_mask = imread('./assets/ghana_map.png')
text = ' '.join(tweets['clean_text'])
wordcloud = WordCloud(
    font_path='./assets/RobotoDraft-Regular.ttf',
    stopwords=STOPWORDS,
    background_color='white',
    max_words=500,
    mask=ghana_map_mask,
    random_state=90
).generate(text)

## save word cloud as image

In [None]:
plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('./tweet_cloud.png', dpi=600)
plt.show()

# Predict sentiments of tweets

In [None]:
sentiment_model = gl.sentiment_analysis.create(tweets, features=['clean_text'])

In [None]:
tweets['sentiment_score'] = sentiment_model.predict(tweets)

In [None]:
def get_sentiment_from_score(value):
    if value > 0.65:
        return 'positive'
    elif value < 0.45:
        return 'negative'
    return 'neutral'

In [None]:
tweets['sentiment'] = tweets['sentiment_score'].apply(get_sentiment_from_score)

# Detects emotions in tweets

### Load NRC emotion lexicons

In [None]:
lexicons = gl.SFrame('data/emotion_lexicons.csv')
topics = lexicons['emotion'].unique()

In [None]:
lexicons

### Convert lexicons from str to int type

In [None]:
topics.sort()
map_topics = {}
for k, v in enumerate(topics.sort()):
    map_topics[v] = k


def get_emotion(value):
    for k, v in map_topics.items():
        if v == value:
            return k

In [None]:
lexicons['emotion'] = lexicons['emotion'].apply(lambda k: map_topics[k])

### Perform word count on tweets

In [None]:
tweets['word_count'] = gl.text_analytics.count_words(tweets['clean_text'])

### Load emotion model

In [None]:
emotion_model = gl.load_model('models/emotion_model')

In [None]:
tweets['emotion_score'] = emotion_model.predict(tweets['word_count'])
tweets['emotion'] = tweets['emotion_score'].apply(get_emotion)
tweets['emotion'].show()

# Visualization of Analysis

## Top tweets

In [None]:
ndc_emotions.sort('NDC', ascending=False)

In [None]:
npp_emotions.sort('NPP', ascending=False)

In [None]:
party_emotions = ndc_emotions.add_columns(npp_emotions[['NPP']])

In [None]:
df_party_emotions = party_emotions.to_dataframe()
df_party_emotions

In [None]:
ax = df_party_emotions.plot.bar(x=df_party_emotions['emotion'], 
                           color=['#60BD68', '#5DA5DA'],
                           sharex=True, rot=0,
                           title='Emotion Displayed In Tweets By Political Party',
                          )

In [None]:
ax.set_ylabel("Count")
ax.set_xlabel("Emotion")
fig = ax.get_figure()
fig.tight_layout()
fig.savefig('plots/tweets_emotions_party.png', dpi=200)

### NPP emotions

In [None]:
npp_surprise = npp.sort('score', ascending=False).filter_by('fear', 'emotion').to_dataframe()

In [None]:
npp_surprise[['emotion', 'text', 'permalink']]

### NDC emotions

In [None]:
ndc_surprise = ndc.sort('score', ascending=False).filter_by('anticipation', 'emotion').to_dataframe()
ndc_surprise[['score', 'text', 'permalink']]

# Save final tweets data as csv

In [None]:
fig = ax.get_figure()
fig.tight_layout()
fig.savefig('plots/tweets_users_count.png', dpi=200)

### Based on Tweet Score

In [None]:
top_users_df = top_users_df.sort_values(by=['Tweet Score'], ascending=False)
top_users_df.reset_index(drop=True, inplace=True)
top_users_df.index += 1
users_score = top_users_df[:10]
users_score = users_score.sort_values('Tweet Score')
ax = users_score.plot(x='Twitter Id', y='Tweet Score', kind='barh', colormap='Accent', 
                          title='Cummulative Tweet Score of Users', figsize=(6, 4))

In [None]:
fig = ax.get_figure()
fig.tight_layout()
fig.savefig('plots/tweets_users_score.png', dpi=200)

### Based on Avg Score Per Tweet

In [None]:
top_users_df = top_users_df.sort_values(by=['Avg Score Per Tweet'], ascending=False)
top_users_df.reset_index(drop=True, inplace=True)
top_users_df.index += 1
users_avg_score = top_users_df[:10]
users_avg_score = users_avg_score.sort_values('Avg Score Per Tweet')
ax = users_avg_score.plot(x='Twitter Id', y='Avg Score Per Tweet', kind='barh', colormap='Accent', 
                          title='Avg Score Per Tweet of Users', figsize=(6, 4))

In [None]:
fig = ax.get_figure()
fig.tight_layout()
fig.savefig('plots/tweets_users_avg_score.png', dpi=200)

## Top hashtags

In [None]:
top_hashtags = tweets.groupby(key_columns='hashtag', 
                           operations={'Count': gl.aggregate.COUNT(), 
                                       'Score': gl.aggregate.SUM('score'),
                                       'Avg Score': gl.aggregate.MEAN('score')
                                      })
top_hashtags.rename({'hashtag': 'Hashtag'})
top_hashtags_df = top_hashtags.to_dataframe()
top_hashtags_df.sort_values('Avg Score', ascending=True, inplace=True)

In [None]:
ax = top_hashtags_df.plot(x='Hashtag', y='Avg Score', kind='barh', colormap='Pastel1',
                    title='Avg Score of Tweets in Hashtags', figsize=(6, 4))

In [None]:
fig = ax.get_figure()
fig.tight_layout()
fig.savefig('plots/tweets_hashtag_score.png', dpi=200)

## Sentiment Polarity

In [None]:
emotions = tweets[['emotion']].groupby(['emotion'], [gl.aggregate.COUNT()])
df_emotions = emotions.to_dataframe()

In [None]:
ax = df_emotions.plot.bar(x='emotion', color=['#60BD68', '#DECF3F', '#5DA5DA', '#F15854', '#FAA43A',
                                              '#B276B2', '#F17CB0', '#4D4D4D'
                                             ],
                          title='Emotion Displayed In All Tweets',
                          legend=False,rot=0
                        )
ax.set_ylabel("Count")
ax.set_xlabel("Emotion")
fig = ax.get_figure()
fig.tight_layout()
fig.savefig('plots/tweets_emotions.png', dpi=200)

### Emotion displayed by Political Party

In [None]:
ndc = tweets.filter_by(['VoteforJMnumber3', 'Vote4JM', 'JMToaso', 'VoteJM2016', 'TransformingGhana', 'ChangingLives'], 
                       column_name='hashtag')
npp = tweets.filter_by(['VoteAkufoAddo', 'VoteNPP', 'VoteForChange', 'ChooseChange', 'Kalyppo', 'Vote4Change'], column_name='hashtag')

In [None]:
ndc_emotions = ndc[['emotion']].groupby(['emotion'], {'NDC': gl.aggregate.COUNT()})
df_ndc_emotions = ndc_emotions.to_dataframe()

npp_emotions = npp[['emotion']].groupby(['emotion'], {'NPP': gl.aggregate.COUNT()})
df_npp_emotions = npp_emotions.to_dataframe()

In [None]:
top_tweets = tweets.sort('score', ascending=False)[:10]
ts = top_tweets[['timestamp', 'text', 'username', 'hashtag', 'score']]
top_tweets_df = ts.to_dataframe()
top_tweets_df.reset_index(drop=True, inplace=True)
top_tweets_df.index += 1
top_tweets_df

## Top users

In [None]:
top_users = tweets.groupby(key_columns='username', 
                           operations={'Tweet Count': gl.aggregate.COUNT(), 
                                       'Tweet Score': gl.aggregate.SUM('score')}
                          )
top_users.rename({'username': 'Twitter Id'})
top_users = top_users[top_users['Tweet Count'] > top_users['Tweet Count'].mean()]

In [None]:
top_users['Avg Score Per Tweet'] = top_users['Tweet Score'] / top_users['Tweet Count']
top_users_df = top_users.to_dataframe()
top_users_df

### Based on Tweet Count

In [None]:
top_users_df = top_users_df.sort_values(by=['Tweet Count'], ascending=False)
top_users_df.reset_index(drop=True, inplace=True)
top_users_df.index += 1
users_count = top_users_df[:10]
users_count = users_count.sort_values('Tweet Count')
ax = users_count.plot(x='Twitter Id', y='Tweet Count', kind='barh', colormap='Accent', 
                          title='Tweet Count of Users', figsize=(6, 4))

In [None]:
sentiments = tweets[['sentiment']].groupby(['sentiment'], [gl.aggregate.COUNT()])

In [None]:
plt.pie(sentiments['Count'], labels=sentiments['sentiment'], 
        colors=['#DECF3F', '#60BD68', '#F15854'], explode = (0, 0.1, 0),
        autopct='%1.1f%%'
       )
plt.axis('off')
plt.savefig('plots/tweets_sentiment.png', dpi=300)
plt.show()

### Sentiment by hashtags

In [None]:
df_tweets = tweets.to_dataframe()

In [None]:
neg_tweets = tweets.filter_by('negative', column_name='sentiment')
neg_tweets = neg_tweets.groupby(['hashtag'], {'Negative Tweets': gl.aggregate.COUNT()})

In [None]:
pos_tweets = tweets.filter_by('positive', column_name='sentiment')
pos_tweets = pos_tweets.groupby(['hashtag'], {'Positive Tweets': gl.aggregate.COUNT()})

In [None]:
neu_tweets = tweets.filter_by('neutral', column_name='sentiment')
neu_tweets = neu_tweets.groupby(['hashtag'], {'Neutral Tweets': gl.aggregate.COUNT()})

In [None]:
pos_tweets.add_columns(neu_tweets[['Neutral Tweets']])
pos_tweets.add_columns(neg_tweets[['Negative Tweets']])

In [None]:
hashtag_sentiment = pos_tweets.to_dataframe()

In [None]:
ax = hashtag_sentiment.plot.barh(x='hashtag', stacked='True',
                            color=['#60BD68', '#DECF3F', '#F15854'], 
                            title='Sentiment Polarity For Hashtags',
                                )
fig = ax.get_figure()
fig.tight_layout()
fig.savefig('plots/tweets_hashtag_sentiment.png', dpi=200)

## Emotion displayed