<h1>Geographical Analysis of Tweets</h1>

<b>Tasks:</b>
<ol>
<li>Import may 5th tweets</li>
<li>Carry out sentiment analysis of the tweets</li>
<li>Geocode the tweets based on user location</li>
<li>Plot them on a map based on Sentiment</li>
</ol>

In [1]:
import pandas as pd
#Read in the previously built dataframe of may 5th data
df = pd.read_csv('may_5_tweets.csv')
df.head()

Unnamed: 0,created_at,text,lang,quoted_status-text,retweeted_status-text,all_text,determined_location,deaths_mention,recovery_mention
0,2020-05-05 11:14:28+00:00,RT @ASlavitt: BREAKING: The organization Trump...,en,,,RT @ASlavitt: BREAKING: The organization Trump...,"Cocoa, Florida",True,False
1,2020-05-05 11:14:29+00:00,"RT @Therinesky: 11,000 ABS-CBN employees are b...",en,,,"RT @Therinesky: 11,000 ABS-CBN employees are b...",Manila,False,False
2,2020-05-05 11:14:29+00:00,RT @MidwivesRCM: Midwives &amp; UK maternity ...,en,,,RT @MidwivesRCM: Midwives &amp; UK maternity ...,"Leeds, England",False,False
3,2020-05-05 11:14:29+00:00,A Storm Is Brewing\n\n#QAnon #WWG1WGA #TheGrea...,en,,,A Storm Is Brewing\n\n#QAnon #WWG1WGA #TheGrea...,"Zachary, LA",False,False
4,2020-05-05 11:14:29+00:00,RT @gtconway3d: I guess we know what keeps the...,en,A group of RINO Republicans who failed badly 1...,,RT @gtconway3d: I guess we know what keeps the...,"Pittsburgh, PA",False,False


Perform sentiment analysis

In [2]:
# perform sentiment analysis for covid tweet
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid  = SentimentIntensityAnalyzer()

In [3]:
sentiment_score_text = df['text'].apply(sid.polarity_scores) 
sentiment = sentiment_score_text.apply(lambda x: x['compound'])

In [4]:
df['sentiment']= sentiment
df.head()

Unnamed: 0,created_at,text,lang,quoted_status-text,retweeted_status-text,all_text,determined_location,deaths_mention,recovery_mention,sentiment
0,2020-05-05 11:14:28+00:00,RT @ASlavitt: BREAKING: The organization Trump...,en,,,RT @ASlavitt: BREAKING: The organization Trump...,"Cocoa, Florida",True,False,0.0
1,2020-05-05 11:14:29+00:00,"RT @Therinesky: 11,000 ABS-CBN employees are b...",en,,,"RT @Therinesky: 11,000 ABS-CBN employees are b...",Manila,False,False,-0.6597
2,2020-05-05 11:14:29+00:00,RT @MidwivesRCM: Midwives &amp; UK maternity ...,en,,,RT @MidwivesRCM: Midwives &amp; UK maternity ...,"Leeds, England",False,False,0.0
3,2020-05-05 11:14:29+00:00,A Storm Is Brewing\n\n#QAnon #WWG1WGA #TheGrea...,en,,,A Storm Is Brewing\n\n#QAnon #WWG1WGA #TheGrea...,"Zachary, LA",False,False,0.0
4,2020-05-05 11:14:29+00:00,RT @gtconway3d: I guess we know what keeps the...,en,A group of RINO Republicans who failed badly 1...,,RT @gtconway3d: I guess we know what keeps the...,"Pittsburgh, PA",False,False,0.4215


In [5]:
print(df['sentiment'].max(), df['sentiment'].min())

0.9834 -0.9756


Geocode the tweets based on user location description

In [6]:
import math
import geopandas as gpd
from geopandas.tools import geocode

import folium 
from folium import Marker
from folium.plugins import MarkerCluster

In [7]:
df = df.sample(n=5000, random_state=1)

In [8]:
def my_geocoder(row):
    try:
        point = geocode(row, provider='nominatim', ).geometry.iloc[0]
        return pd.Series({'Latitude': point.y, 'Longitude': point.x, 'geometry': point})
    except:
        return None

locations_decoded = df[['determined_location']].apply(lambda x: my_geocoder(x), axis=1)


In [9]:
import numpy as np
print('The percentage of places geocoded is {}'.format(sum(~np.isnan(locations_decoded['Latitude']))*100/len(df)))

The percentage of places geocoded is 90.42


This is a good number to plot on the map although the data is 100% not realiable

In [10]:
locations_decoded.reset_index(inplace=True)
locations_decoded = locations_decoded[locations_decoded['Latitude'].notnull()]
df_with_location = pd.merge(df.reset_index(), locations_decoded, how='right', left_index=True, right_index=True)
df_with_location = gpd.GeoDataFrame(df_with_location, geometry=df_with_location.geometry)
df_with_location.crs = {'init': 'epsg:4326'}
df_with_location.head()

Unnamed: 0,index_x,created_at,text,lang,quoted_status-text,retweeted_status-text,all_text,determined_location,deaths_mention,recovery_mention,sentiment,index_y,Latitude,Longitude,geometry
0,5633,2020-05-05 11:18:11+00:00,RT @JOYNEWSONTV: (((LIVE: Ministry of Informat...,en,,,RT @JOYNEWSONTV: (((LIVE: Ministry of Informat...,accra,False,False,0.0,5633,5.560014,-0.205744,POINT (-0.20574 5.56001)
1,5769,2020-05-05 11:18:16+00:00,"RT @nspector4: Basically, the @JustinTrudeau g...",en,Most of Canada’s cases in late February and ea...,,"RT @nspector4: Basically, the @JustinTrudeau g...","Barrie, Ontario",False,False,0.0,5769,44.389311,-79.690174,POINT (-79.69017 44.38931)
2,2625,2020-05-05 11:16:12+00:00,"COVID-10 cases nearing 100,000 in Iran https:/...",en,,,"COVID-10 cases nearing 100,000 in Iran https:/...",Azerbaijan,False,False,0.0,2625,40.393629,47.787251,POINT (47.78725 40.39363)
3,2615,2020-05-05 11:16:12+00:00,RT @TeaPainUSA: Tea’s gonna be straight with y...,en,BREAKING: The organization Trump used to proje...,,RT @TeaPainUSA: Tea’s gonna be straight with y...,"Florida, USA",True,False,-0.4588,2615,27.756767,-81.463983,POINT (-81.46398 27.75677)
4,6434,2020-05-05 11:18:43+00:00,RT @ashindestad: You won Britain. Enjoy the de...,en,UK now has highest coronavirus death toll in E...,,RT @ashindestad: You won Britain. Enjoy the de...,"London, England",True,False,0.8648,6434,51.507322,-0.127647,POINT (-0.12765 51.50732)


Here is a map with the tweets plotted on each location, we can then do some sentiment analysis and we can see 
how it matches to policy stringency data. 

In [11]:
#m = folium.Map(location=[30, 0], zoom_start=2)

#for idx, row in df_with_location.iterrows():
#    Marker([row.Latitude, row.Longitude], popup=row['text']).add_to(m)   
#m
#import branca
#import branca.colormap as cm
#colormap = cm.LinearColormap(colors=['red','green'], index=[-1,1],vmin=-1,vmax=1)

In [12]:
def color_producer(val):
    if val < -0.6:
        return 'red'
    if val > 0.6:
        return 'green'
    else:
        return 'yellow'
 
m_2 = folium.Map(location=[30, 0], zoom_start=2) 

for idx, row in df_with_location.iterrows():
    folium.Circle(location=[row['Latitude'], row['Longitude']], radius=10, 
                  color=color_producer(row['sentiment']), popup=row['text']).add_to(m_2)
m_2