### Twitter Sentiment Analysis - Gov CDMX

In [None]:
from datalab.context import Context
import google.datalab.storage as storage
import google.datalab.bigquery as bq
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import pandas as pd
import numpy as np
import shutil
import time
from tensorflow.python.lib.io import file_io

In [None]:
APIKEY="XXXXX"

In [None]:
!pip install --upgrade google-api-python-client

In [None]:
%bq tables describe --name twitter.best

Now that the data is in bigquery we can save a section to cloud storage or grab it direct from bigquery. For this experiment we just put a small set into Google Cloud Storage.

In [None]:
with file_io.FileIO('gs://nlpstorage/results-20181102-141401.csv', 'r') as f:
  df=pd.read_csv(f)

In [None]:
df.head()

In [None]:
print(df['tweet_text'][0])
print(len(df['tweet_text']))

In [None]:
rts=df[df['tweet_text'].str.match('"RT')]
print("Retweets ", len(rts['tweet_text']))
google=df[df['tweet_text'].str.contains('#CDMX')]
print("Google ", len(google['tweet_text']))
#another

In [None]:
from googleapiclient.discovery import build

polarity=[]
magnitude=[]

lservice = build('language', 'v1beta1', developerKey=APIKEY)
for tweet in google['tweet_text']:
  response = lservice.documents().analyzeSentiment(
    body={
      'document': {
         'type': 'PLAIN_TEXT',
         'content': tweet
      }
    }).execute()
  polarity.append(response['documentSentiment']['polarity'])
  magnitude.append(response['documentSentiment']['magnitude'])

print(len(polarity))
#print('POLARITY=%s MAGNITUDE=%s for %s' % (polarity, magnitude, tweet))

In [None]:
google['polarity']=polarity
google['magnitude']=magnitude
google.head()

In [None]:
google['datef']=pd.to_datetime(google['tweet_timestamp'], yearfirst='TRUE')
google['datef'].head()

In [None]:
#averages
polavg=np.mean(google['polarity'])
magavg=np.mean(google['magnitude'])
print('polarity', polavg, 'magnitude', magavg)

In [None]:
data = pd.concat([google['datef'], google['magnitude']], axis=1)
data.set_index('datef',inplace=True)
fig, ax = plt.subplots(figsize=(15,7))
data.plot(ax=ax, legend=False)
ax.axhline(y=magavg, linewidth=4, color='r')
ax.xaxis.set_major_locator(mdates.MinuteLocator(interval=5))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%M'))
ax.set_xlabel('Date (minutes from 23rd hour on Nov 1 2018)')
ax.set_ylabel('Magnitude')

In [None]:
data2 = pd.concat([google['datef'], google['polarity']], axis=1)
data2.set_index('datef',inplace=True)
fig, ax2 = plt.subplots(figsize=(15,7))
data2.plot(ax=ax2, legend=False)
ax2.axhline(y=polavg, linewidth=4, color='r')
ax2.xaxis.set_major_locator(mdates.MinuteLocator(interval=5))
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%M'))
ax2.set_xlabel('Date (minutes from 23rd hour on Nov 1 2018)')
ax2.set_ylabel('Polarity')

High magnitude tweets are more impactful than low magnitude tweets, a weak statement doesn't say much. We will concentrate only on statements with a magnitude of 0.5 or higher.

In [None]:
fig, ax = plt.subplots()
ax.figure.set_size_inches(10,4)
ax.grid(False)
#ax.scatter(google.magnitude[google['magnitude'] >=0.5], google.polarity[google['magnitude'] >=0.5], s=120, c='black', alpha=0.5)
ax.scatter(google.magnitude, google.polarity, s=120, c='black', alpha=0.5)
ax.set(xlabel='magnitude', ylabel='polarity')
plt.show()

We can also see that things around zero polarity (neither very positive nor negative) are not interesting to flag 

In [None]:
 love=google[(google['magnitude'] >=0.5) & (google['polarity'] >=0.5)]
 hate=google[(google['magnitude'] >=0.5) & (google['polarity'] <= -0.5)] 

In [None]:
print(len(google['magnitude']), len(love['magnitude']), len(hate['magnitude']))
print("weak", len(google['magnitude']) - (len(love['magnitude'])+len(hate['magnitude'])))

In [None]:
labels = ['loves', 'hates', 'weak']
sizes = [30, 115, 437]
colors = ['gold', 'lightskyblue', 'lightcoral']
patches, texts = plt.pie(sizes, colors=colors, shadow=True, startangle=90)
plt.legend(patches, labels, loc="best")
plt.axis('equal')
plt.tight_layout()
plt.show()

In [None]:
fig, ax= plt.subplots()
ax.scatter(love.magnitude, love.polarity, s=120, c='purple', alpha=0.5)
ax.scatter(hate.magnitude, hate.polarity, s=120, c='red', alpha=0.5)
ax.figure.set_size_inches(10,4)
ax.grid(False) 
ax.set(xlabel='magnitude >0.5', ylabel='abs(polarity) >=0.5')
plt.show()

In [None]:
fig, ax1 = plt.subplots()
ax1.set_xlabel('Date (minutes from 23rd hour on Nov 1 2018)')
ax1.set_ylabel('magnitude')
ax1.grid(False)
ax1.xaxis.set_major_locator(mdates.MinuteLocator(interval=5))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%M'))
ax1.figure.set_size_inches(10,4)
ax1.plot(love['datef'], love['magnitude'], 'g*', markersize=20, alpha=0.5)
ax1.plot(hate['datef'], hate['magnitude'], 'r*', markersize=20, alpha=0.5)
ax2=ax1.twinx()
ax2.grid(False)
ax2.plot(love.datef, love.polarity, 'g.', markersize=20, alpha=0.5)
ax2.plot(hate.datef, hate.polarity, 'r.', markersize=20, alpha=0.5)
ax2.set_ylabel('polarity')
#ax2.axhline(y=polavg, linewidth=4, color='r')
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.set_xlabel('Date (minutes from 23rd hour on Nov 1 2018)')
ax.grid(False)
ax.xaxis.set_major_locator(mdates.MinuteLocator(interval=5))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%M'))
ax.figure.set_size_inches(10,4)
ax.plot(love['datef'], love['magnitude'], 'g', linestyle=':', marker='*', linewidth=2, markersize=20, alpha=0.5)
ax.plot(hate['datef'], hate['magnitude'], 'k', linestyle=':', marker='.', linewidth=2, markersize=20, alpha=0.5)
ax.set_ylabel('magnitude')
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.set_xlabel('Date (minutes from 23rd hour on Nov 1 2018)')
ax.set_ylabel('polarity')
ax.grid(False)
ax.xaxis.set_major_locator(mdates.MinuteLocator(interval=5))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%M'))
ax.figure.set_size_inches(10,4)
ax.plot(love['datef'], love['polarity'], 'b', linestyle=':', marker='.', linewidth=2, markersize=20, alpha=0.5)
ax.plot(hate['datef'], hate['polarity'], 'r', linestyle=':', marker='.', linewidth=2, markersize=20, alpha=0.5)
plt.show()