In [1]:
import pandas as pd
import numpy as np

import folium
from folium.plugins import HeatMap

import countries

# Data Processing

### read data

In [2]:
rd = pd.read_csv("./25th-3rd_Tweets.csv", encoding = "utf-8")
rd.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,ID,Name,Created_at,Location,Lat,Long,Text,Sentiment,Toxic,severe_toxicity,Obscene,Threat,Insult,identity_attack,sexual_explicit,Adjusted_Toxic,Adjusted_Sentiment
0,0,0,1.242895e+18,JakeLX,2020-03-25 19:22:15,"Nashville, TN",-87.022482,35.994568,"If I see a senior citizen walking outside, am ...",0.383545,0.02433,0.001378,0.001525,0.014227,0.007907,0.004199,0.00132,-0.0,0.23291
1,1,1,1.242895e+18,UNSHAKABLE_Sal,2020-03-25 19:22:11,UK,-0.444459,51.632236,Stay home. Stay safe. We’ll celebrate soon. To...,0.928223,0.004102,0.000605,0.001057,0.001796,0.001789,0.000997,0.000849,-0.0,-0.0
2,2,2,1.242895e+18,OwlWitch13,2020-03-25 19:22:10,"New Orleans, LA",-90.237612,29.968359,Anyone voting for the ignorant piece of shit @...,0.81543,0.882833,0.099645,0.736021,0.006798,0.826151,0.018405,0.00822,0.765665,-0.0
3,3,3,1.242895e+18,LiveBoston617,2020-03-25 19:22:08,"Boston, MA",-71.191421,42.227797,🔘BOSTON COVID-19🔘\n\nSpotted earlier today on ...,0.088501,0.0039,0.000593,0.001315,0.001519,0.001707,0.001012,0.001065,-0.0,0.822998
4,4,4,1.242895e+18,islamboghdady1,2020-03-25 19:22:06,"England, United Kingdom",-0.549769,51.870646,If anyone needs help or knows anyone that need...,0.328369,0.003736,0.000598,0.001269,0.001188,0.001661,0.001112,0.000853,-0.0,0.343262


In [3]:
data = pd.DataFrame(rd, columns = ['Created_at', 'Long', 'Lat', 'Sentiment', 
                                'Toxic', 'severe_toxicity', 'Obscene', 'Threat', 
                                'Insult', 'identity_attack', 'sexual_explicit'])
data = data.rename(columns = {'Long': 'lat', 'Lat': 'lon'})
data.head()

Unnamed: 0,Created_at,lat,lon,Sentiment,Toxic,severe_toxicity,Obscene,Threat,Insult,identity_attack,sexual_explicit
0,2020-03-25 19:22:15,35.994568,-87.022482,0.383545,0.02433,0.001378,0.001525,0.014227,0.007907,0.004199,0.00132
1,2020-03-25 19:22:11,51.632236,-0.444459,0.928223,0.004102,0.000605,0.001057,0.001796,0.001789,0.000997,0.000849
2,2020-03-25 19:22:10,29.968359,-90.237612,0.81543,0.882833,0.099645,0.736021,0.006798,0.826151,0.018405,0.00822
3,2020-03-25 19:22:08,42.227797,-71.191421,0.088501,0.0039,0.000593,0.001315,0.001519,0.001707,0.001012,0.001065
4,2020-03-25 19:22:06,51.870646,-0.549769,0.328369,0.003736,0.000598,0.001269,0.001188,0.001661,0.001112,0.000853


### Heatmap

In [4]:
for i in range(len(data)):
    data.iloc[i, 3] = (data.iloc[i, 3] - 0.5) * 2
data.head()

Unnamed: 0,Created_at,lat,lon,Sentiment,Toxic,severe_toxicity,Obscene,Threat,Insult,identity_attack,sexual_explicit
0,2020-03-25 19:22:15,35.994568,-87.022482,-0.23291,0.02433,0.001378,0.001525,0.014227,0.007907,0.004199,0.00132
1,2020-03-25 19:22:11,51.632236,-0.444459,0.856445,0.004102,0.000605,0.001057,0.001796,0.001789,0.000997,0.000849
2,2020-03-25 19:22:10,29.968359,-90.237612,0.630859,0.882833,0.099645,0.736021,0.006798,0.826151,0.018405,0.00822
3,2020-03-25 19:22:08,42.227797,-71.191421,-0.822998,0.0039,0.000593,0.001315,0.001519,0.001707,0.001012,0.001065
4,2020-03-25 19:22:06,51.870646,-0.549769,-0.343262,0.003736,0.000598,0.001269,0.001188,0.001661,0.001112,0.000853


In [5]:
data_h = pd.DataFrame(data, columns = ['lat', 'lon', 'Sentiment']).values.tolist()
hmap = folium.Map()
hmap.add_child(HeatMap(data_h, radius=18))
hmap.save(r'visu/heatmap_sentiment_25.html')

### Distribution

In [6]:
# Instantiate a feature group for the incidents in the dataframe
dis = folium.map.FeatureGroup()

# Loop through the 200 crimes and add each to the incidents feature group
data_d = pd.DataFrame(data, columns = ['lat', 'lon', 'Sentiment']).values.tolist()

for i in data_d:
    dis.add_child(
        folium.CircleMarker(
            [i[0], i[1]],
            radius=1, # define how big you want the circle markers to be
            color='red',
        )
    )

    
# Add incidents to map
dis_map = folium.Map()
dis_map.add_child(dis)
dis_map.save(r'visu/distribution_neg_25.html')

### Find country name

In [None]:
# load country borders
cc = countries.CountryChecker('TM_WORLD_BORDERS-0.3.shp')

country_name = list()
flag = 0
total = len(data)
for i, j in zip(data['lat'], data['lon']):
    print('\r' + '%.3f' % (flag/total*100), end = '')
    na = str(cc.getCountry(countries.Point(i, j)))
    if na == 'United States' or na == 'None':
        na = 'United States of America'
    elif na == 'Hong Kong' or na == 'Taiwan':
        na = 'China'
    elif na == 'Korea, Republic of' or na == 'Korea':
        na = 'South Korea'
    elif na == 'Viet Nam':
        na = 'Vietnam'
    elif na == 'Iran (Islamic Republic of)':
        na = 'Iran'
    elif na == 'Singapore':
        na = 'Malaysia'
    elif na == 'Serbia':
        na = 'Republic of Serbia'
    elif na == 'Burma':
        na = 'Myanmar'
    elif na == 'Isle of Man':
        na = 'United Kingdom'
    elif na == 'Palestine':
        na = 'Israel'
    elif na == 'Guernsey':
        na = 'France'
    elif na == 'Malta':
        na = 'Italy'
    elif na == 'The former Yugoslav Republic of Macedonia':
        na = 'Macedonia'
    elif na == 'Andorra':
        na = 'Spain'
    elif na == 'Libyan Arab Jamahiriya':
        na = 'Libya'
    country_name.append(na)
    flag += 1 

### merge data, insert country name

In [None]:
# merge data
data.insert(3, 'country', country_name)

# pop lat and lon
data.pop('lat')
data.pop('lon')
data.head()

In [None]:
 # backup data
backup = data.copy()

# Plot 1

### group data by date

In [None]:
# deal with date
for i in range(len(data)):
    data.iloc[i, 0] = data.iloc[i, 0].split('/')[1]
data.head()

In [None]:
# group
#data_d = data.groupby(('Created_at', 'country'))
data_d.groups

In [None]:
# mean by group
mean1 = data_d.aggregate(np.mean)
attr = 'Sentiment'

# extend range of sentiment
# softmax
v = list(mean1[attr])
v = [np.exp(i) for i in v]
total = sum(v)
for i in range(len(mean1)):
    mean1.iloc[i, 0] = 100*(mean1.iloc[i, 0]/total)

# extend
v = list(mean1[attr])
range_v = max(v) - min(v)
frac = range_v / 2
for i in range(len(mean1)):
    mean1.iloc[i, 0] = mean1.iloc[i, 0]/frac
v = list(mean1[attr])
range_v = max(v) - 2
for i in range(len(mean1)):
    mean1.iloc[i, 0] =  mean1.iloc[i, 0] - range_v - 1
    
mean1

In [None]:
# data to plot
index = mean1.index.tolist()
value = mean1[attr].tolist()

tmp_index = list()
tmp_value = list()
for d in range(7, 14):
    for i in range(len(index)):
        if index[i][0] == str(d):
            tmp_index.append(index[i][1])
            tmp_value.append(value[i])
    data_to_plot = pd.DataFrame({'country': tmp_index, attr: tmp_value})
    data_to_plot.head()
    
    # plot
    country_geo = 'world-countries.json' # read geo file
    map = folium.Map() # create a new map
    map.choropleth(geo_data=country_geo, data=data_to_plot, 
                   columns=['country', attr],
                   key_on='feature.properties.name',
                   fill_color='YlOrRd', nan_fill_color = 'white',
                   fill_opacity=0.7, line_opacity=0.2,
                   legend_name=attr)
    map.save('plot_data_'+str(d)+'.html')

# Plot 2

### group data by country name

In [None]:
# group
data_c = data.groupby('country')
data_c.get_group('Canada')

In [None]:
# mean by group
mean2 = data_c.aggregate(np.mean)
attr = 'Sentiment'

# extend range of sentiment
# softmax
v = list(mean2[attr])
v = [np.exp(i) for i in v]
total = sum(v)
for i in range(len(mean2)):
    mean2.iloc[i, 0] = 100*(mean2.iloc[i, 0]/total)

# extend
v = list(mean2[attr])
range_v = max(v) - min(v)
frac = range_v / 2
for i in range(len(mean2)):
    mean2.iloc[i, 0] = mean2.iloc[i, 0]/frac
v = list(mean2[attr])
range_v = max(v) - 2
for i in range(len(mean2)):
    mean2.iloc[i, 0] =  mean2.iloc[i, 0] - range_v - 1

mean2

In [None]:
"""
# adding positive or negative
mean.pop('pos_or_neg')
pos_or_neg = list()
for i in range(len(mean)):
    if mean.iloc[i, 0] > 0:
        pos_or_neg.append(1)
    else:
        pos_or_neg.append(-1)
mean.insert(1, 'pos_or_neg', pos_or_neg)
mean
"""

In [None]:
# data to plot
index = mean2.index.tolist()
value = mean2[attr].tolist()

data_to_plot = pd.DataFrame({'country': index, attr: value})
data_to_plot.head()

In [None]:
# plot
country_geo = 'world-countries.json' # read geo file
map = folium.Map() # create a new map
map.choropleth(geo_data=country_geo, data=data_to_plot, 
               columns=['country', attr],
               key_on='feature.properties.name',
               fill_color='YlOrRd', nan_fill_color = 'white',
               fill_opacity=0.7, line_opacity=0.2,
               legend_name=attr)
map.save(r'visu/plot_sentiment.html')
map

# Test

In [None]:
"""
time = list()
for i in range(len(tw)):
    tmp = tw['Created_at'][i].split('/')
    time .append([tmp[0], tmp[1]])

time[:5]
"""