# Creation of an interactive map

The objective of this section is to create a conclusive visualisation in the shape of an interactive map. It will include data per country and per year about : Sentiment towards the US, political v non political differences and main positive and negative topics

In [None]:
import folium 
import pandas as pd
import numpy as np 
import json

### Sentiment and topics  

We first need to get the sentiment and the main topics per country

In [576]:
#Get the dataframe 
topic = pd.read_csv("../Data - Project/topic_nationality_total.csv")
topic.head()

  elif async_ :


Unnamed: 0.1,Unnamed: 0,nationality,sentiment,topic,quoteID
0,0,United States of America,-0.228475,36,
1,1,United States of America,0.398312,102,
2,3,United States of America,-0.933972,131,
3,4,United States of America,0.598767,102,
4,5,United States of America,0.975977,72,


The topic are integers, but this gives no information. We need a dictionnary to assign each number to a topic name (Done manually)

In [659]:
#Get the dictionnary 
dico = pd.read_csv("../Data - Project/dict.csv", sep = ";")
dico["Topic Number"] = dico["Topic Number"].astype(float)
dico.index = dico["Topic Number"]
dico.drop("Topic Number", axis = 1, inplace = True)
dicti = dico.to_dict()

We now need to clean the data a bit to make it compatible with the interactive map. To do so, we need to make the names of the countries the same as in the geo data

In [660]:
#Dictionary making the names compatible
country_name_map = {
    "People's Republic of China": "China",
    "Kingdom of the Netherlands": "Netherlands",
    "Republic of the Congo": "Congo",
    "South Sudan": "S. Sudan",
    "Democratic Republic of the Congo": "Dem. Rep. Congo",
    "Bosnia and Herzegovina": "Bosnia and Herz.",
    "Czech Republic": "Czechia",
    "North Macedonia": "Macedonia",
    "The Bahamas": "Bahamas",
    "Dominican Republic": "Dominican Rep.",
    "Solomon Islands": "Solomon Is.",
    "Central African Republic": "Central African Rep."}

In [627]:
topic = topic[topic["topic"] != 131]
topic["quoteID"] = topic["quoteID"].str.split("-",1).str[0]
topic["quoteID"] = topic["quoteID"].replace(np.nan, 2015)
topic["topic"].isna().sum()
topic["nationality"].replace(country_name_map, inplace = True)

In [628]:
topic.head()

Unnamed: 0.1,Unnamed: 0,nationality,sentiment,topic,quoteID
0,0,United States of America,-0.228475,36,2015
1,1,United States of America,0.398312,102,2015
3,4,United States of America,0.598767,102,2015
4,5,United States of America,0.975977,72,2015
5,7,United States of America,-0.892552,3,2015


We want to have good and bad topics, so we need to apply a filter on the sentiments to get this differences

In [629]:
sent  = topic.groupby(["quoteID","nationality"])["sentiment"].mean().to_frame().reset_index()

#Make the df have countries as rows and years as columns
sent = sent.pivot("nationality","quoteID","sentiment")
sent["nationality"] = sent.index
sent.columns

Index([2015, '2016', '2017', '2018', '2019', '2020', 'nationality'], dtype='object', name='quoteID')

In [649]:
good = topic[topic["sentiment"]>0.3]
bad = topic[topic["sentiment"]<-0.3]


The data is ready, we can now apply groupby and mode to get the most common topic per year and country, we then pivot the df to have countries as rows and years as columns

In [650]:
good_gb = good.groupby(["quoteID","nationality"])["topic"].agg(lambda x:x.value_counts().index[0]).to_frame().reset_index()
bad_gb = bad.groupby(["quoteID","nationality"])["topic"].agg(lambda x:x.value_counts().index[0]).to_frame().reset_index()


Index(['quoteID', 'nationality', 'topic'], dtype='object')

In [652]:
good_gb = good_gb.pivot("nationality","quoteID","topic")
bad_gb = bad_gb.pivot("nationality","quoteID","topic")
ok_gb = ok_gb.pivot("nationality","quoteID","topic")
good_gb = ok_gb.replace(np.nan, "NA")
bad_gb = ok_gb.replace(np.nan, "NA")
ok_gb = ok_gb.replace(np.nan, "NA")
ok_gb = ok_gb.replace(dicti["Topic Label"])
good_gb = good_gb.replace(dicti["Topic Label"])
bad_gb = bad_gb.replace(dicti["Topic Label"])


In [665]:
good_gb["country"] = good_gb.index
ok_gb["country"] = ok_gb.index
bad_gb["country"] = bad_gb.index

### Politicians and non-politicians

We load the data and apply some preprocessing

In [661]:
df = pd.read_csv("../Data - Project/stats_politicians.csv.bz2")

df["Country"] = df.index

df = df.pivot("country","year","abs_diff")   
    
df["Country"] = df.index
df["Country"].replace(country_name_map, inplace = True)


### Matching with the geo data 

For the map to function, we need to make our data match with the geo data. To do so, we keep only the countries available in both datasets

In [637]:
f = open(r'C:\Users\Kirsch\ADA2021\Data - Project/world-countries.json')
country_shape = json.load(f)

In [666]:
countries = good_gb["country"].to_list()

output_dict = []
common_countries = []

for i in range(len(country_shape['features'])) : 
    if country_shape['features'][i]['properties']["name"] in (countries):
        output_dict.append(country_shape['features'][i])
        common_countries.append(country_shape['features'][i]["properties"]["name"])

In [667]:
len(common_countries)

144

144 countries are common. This is good. We now need to rank them alphabetically to match the JSON file

In [641]:
ct = []
for i in range(len(output_dict)) :
    c = output_dict[i]["properties"]["name"]
    ct.append(c)
    
ct.sort()


In [653]:
out = []
#Apply the country filtering on the JSON
for i in ct: 
    for j in range(len(output_dict)): 
        if output_dict[j]["properties"]["name"] == i : 
            out.append(output_dict[j])
            
country_shape["features"]  = out

For all the hovering data, we need to insert our data in the json file

In [670]:
# prepare the customised text
tooltip_text = []
_2015 = []
_2016 = []
_2017 = []
_2018 = []
_2019 = []
_2020 = []
good_2015 = []
good_2016 = []
good_2017 = []
good_2018 = []
good_2019 = []
good_2020 = []
ok_2015 = []
ok_2016 = []
ok_2017 = []
ok_2018 = []
ok_2019 = []
ok_2020 = []
bad_2015 = []
bad_2016 = []
bad_2017 = []
bad_2018 = []
bad_2019 = []
bad_2020 = []
df = df[df["Country"].isin(common_countries)]
good_gb = good_gb[good_gb["country"].isin(common_countries)]
ok_gb = ok_gb[ok_gb["country"].isin(common_countries)]
badd_gb = bad_gb[bad_gb["country"].isin(common_countries)]

#Normalize the politician versus non politician score
for i in range(2015,2021) : 
    df[i] = (df[i]-df[i].min())/(df[i].max()-df[i].min())
    for j in range(len(df)) : 
        df.iloc[j,i-2015] = df.iloc[j,i-2015].item() 
df = df.round(2)
#Get all the hover data
for idx in range(len(df)):
    good_2015.append(str(good_gb[2015][idx]))
    good_2016.append(str(good_gb["2016"][idx]))
    good_2017.append(str(good_gb["2017"][idx]))
    good_2018.append(str(good_gb["2018"][idx]))
    good_2019.append(str(good_gb["2019"][idx]))
    good_2020.append(str(good_gb["2020"][idx]))
    
    ok_2015.append(str(ok_gb[2015][idx]))
    ok_2016.append(str(ok_gb["2016"][idx]))
    ok_2017.append(str(ok_gb["2017"][idx]))
    ok_2018.append(str(ok_gb["2018"][idx]))
    ok_2019.append(str(ok_gb["2019"][idx]))
    ok_2020.append(str(ok_gb["2020"][idx]))
    
    bad_2015.append(str(bad_gb[2015][idx]))
    bad_2016.append(str(bad_gb["2016"][idx]))
    bad_2017.append(str(bad_gb["2017"][idx]))
    bad_2018.append(str(bad_gb["2018"][idx]))
    bad_2019.append(str(bad_gb["2019"][idx]))
    bad_2020.append(str(bad_gb["2020"][idx]))
    
    _2015.append(str(df[2015][idx]))
    _2016.append(str(df[2016][idx]))
    _2017.append(str(df[2017][idx]))
    _2018.append(str(df[2018][idx]))
    _2019.append(str(df[2019][idx]))
    _2020.append(str(df[2020][idx]))
    
                     
# Append the data to the JSON file
for idx in range(len(df)):
    country_shape['features'][idx]['properties']['good_2015'] = good_2015[idx]
    country_shape['features'][idx]['properties']['good_2016'] = good_2016[idx]
    country_shape['features'][idx]['properties']['good_2017'] = good_2017[idx]
    country_shape['features'][idx]['properties']['good_2018'] = good_2018[idx]
    country_shape['features'][idx]['properties']['good_2019'] = good_2019[idx]
    country_shape['features'][idx]['properties']['good_2020'] = good_2020[idx]
    
    country_shape['features'][idx]['properties']['ok_2015'] = ok_2015[idx]
    country_shape['features'][idx]['properties']['ok_2016'] = ok_2016[idx]
    country_shape['features'][idx]['properties']['ok_2017'] = ok_2017[idx]
    country_shape['features'][idx]['properties']['ok_2018'] = ok_2018[idx]
    country_shape['features'][idx]['properties']['ok_2019'] = ok_2019[idx]
    country_shape['features'][idx]['properties']['ok_2020'] = ok_2020[idx]
    
    country_shape['features'][idx]['properties']['bad_2015'] = bad_2015[idx]
    country_shape['features'][idx]['properties']['bad_2016'] = bad_2016[idx]
    country_shape['features'][idx]['properties']['bad_2017'] = bad_2017[idx]
    country_shape['features'][idx]['properties']['bad_2018'] = bad_2018[idx]
    country_shape['features'][idx]['properties']['bad_2019'] = bad_2019[idx]
    country_shape['features'][idx]['properties']['bad_2020'] = bad_2020[idx]
    
    country_shape['features'][idx]['properties']['2015'] = _2015[idx]
    country_shape['features'][idx]['properties']['2016'] = _2016[idx]
    country_shape['features'][idx]['properties']['2017'] = _2017[idx]
    country_shape['features'][idx]['properties']['2018'] = _2018[idx]
    country_shape['features'][idx]['properties']['2019'] = _2019[idx]
    country_shape['features'][idx]['properties']['2020'] = _2020[idx]


144


## Create the map 

Finally, everything is done for us to create the interactive map

In [658]:
m = folium.Map()

#Create basemap
base_map = folium.FeatureGroup(name='Basemap', overlay=True, control=False)
folium.TileLayer(tiles='OpenStreetMap').add_to(base_map)
base_map.add_to(m)


#Add the sentiment for all the years

chloropeth_2015 = folium.Choropleth(
    #The GeoJSON data to represent the world country
    geo_data=country_shape,
    name='2015',
    data=sent,
    #The column aceppting list with 2 value; The country name and  the numerical value
    columns=['nationality', 2015],
    key_on='feature.properties.name',
    legend_name='Sentiment towards the US per year',
    fill_color='RdYlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    missing_kwds={'color': 'white'},
    overlay = False,
    nan_fill_color = "White"
).add_to(m)

chloropeth_2016 = folium.Choropleth(
    #The GeoJSON data to represent the world country
    geo_data=country_shape,
    name='2016',
    data=sent,
    #The column aceppting list with 2 value; The country name and  the numerical value
    columns=['nationality', "2016"],
    key_on='feature.properties.name',
    legend_name='Sentiment towards the US per year',
    fill_color='RdYlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    missing_kwds={'color': 'white'},
    overlay = False,
    nan_fill_color = "White"
).add_to(m)

chloropeth_2017 = folium.Choropleth(
    #The GeoJSON data to represent the world country
    geo_data=country_shape,
    name='2017',
    data=sent,
    #The column aceppting list with 2 value; The country name and  the numerical value
    columns=['nationality', "2017"],
    key_on='feature.properties.name',
    legend_name='Sentiment towards the US per year',
    fill_color='RdYlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    missing_kwds={'color': 'white'},
    overlay = False,
    nan_fill_color = "White"
).add_to(m)

chloropeth_2018 = folium.Choropleth(
    #The GeoJSON data to represent the world country
    geo_data=country_shape,
    name='2018',
    data=sent,
    #The column aceppting list with 2 value; The country name and  the numerical value
    columns=['nationality', "2018"],
    key_on='feature.properties.name',
    fill_color='RdYlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    missing_kwds={'color': 'white'},
    overlay = False,
    nan_fill_color = "White"
    
).add_to(m)

chloropeth_2019 = folium.Choropleth(
    #The GeoJSON data to represent the world country
    geo_data=country_shape,
    name='2019',
    data=sent,
    #The column aceppting list with 2 value; The country name and  the numerical value
    columns=['nationality', "2019"],
    key_on='feature.properties.name',
    legend_name='Sentiment towards the US per year',
    fill_color='RdYlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    missing_kwds={'color': 'white'},
    overlay = False,
    nan_fill_color = "White"
).add_to(m)

chloropeth_2020 = folium.Choropleth(
    #The GeoJSON data to represent the world country
    geo_data=country_shape,
    name='2020',
    data=sent,
    #The column aceppting list with 2 value; The country name and  the numerical value
    columns=['nationality', "2020"],
    key_on='feature.properties.name',
    legend_name='Sentiment towards the US per year',
    fill_color='RdYlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    missing_kwds={'color': 'white'},
    overlay = False,
    nan_fill_color = "White"
).add_to(m)


#Add the hover informations for all the years
folium.LayerControl().add_to(m)

chloropeth_2015.geojson.add_child(
    folium.features.GeoJsonTooltip(
        fields=['name','2015', 'good_2015' ,'bad_2015'],
        aliases=['Country', 'Politic vs non politic', 'Most positive topic : ', "Negative topics : "],
        style=('background-color: grey; color: white;'),
        localize=True
        )
)
chloropeth_2016.geojson.add_child(
    folium.features.GeoJsonTooltip(
        fields=['name','2016', 'good_2016','bad_2016'],
        aliases=['Country', 'Politic vs non politic', "Most positive topic : ", "Negative topics : "],
        style=('background-color: grey; color: white;'),
        localize=True
        )
)
chloropeth_2017.geojson.add_child(
    folium.features.GeoJsonTooltip(
        fields=['name','2017', 'good_2017','bad_2017'],
        aliases=['Country', 'Politic vs non politic', 'Most positive topic : ', "Negative topics : "],
        style=('background-color: grey; color: white;'),
        localize=True
        )
)
chloropeth_2018.geojson.add_child(
    folium.features.GeoJsonTooltip(
        fields=['name','2018', 'good_2018', 'bad_2018'],
        aliases=['Country', 'Politic vs non politic', 'Most positive topic : ', "Negative topics : "],
        style=('background-color: grey; color: white;'),
        localize=True
        )    
)
chloropeth_2019.geojson.add_child(
    folium.features.GeoJsonTooltip(
        fields=['name','2019', 'good_2019', 'bad_2019'],
        aliases=['Country', 'Politic vs non politic', 'Most positive topic : ', "Negative topics : "],
        style=('background-color: grey; color: white;'),
        localize=True
        )
)
chloropeth_2020.geojson.add_child(
    folium.features.GeoJsonTooltip(
        fields=['name','2020', 'good_2020', 'bad_2020'],
        aliases=['Country', 'Politic vs non politic', 'Most positive topic : ', "Negative topics : "],
        style=('background-color: grey; color: white;'),
        localize=True
        )
)

#Keep only one legend
for key in chloropeth_2016._children : 
    if key.startswith("color_map"):
        del chloropeth_2016._children[key]
for key in chloropeth_2017._children : 
    if key.startswith("color_map"):
        del chloropeth_2017._children[key]
for key in chloropeth_2018._children : 
    if key.startswith("color_map"):
        del chloropeth_2018._children[key]
for key in chloropeth_2019._children : 
    if key.startswith("color_map"):
        del chloropeth_2019._children[key]
for key in chloropeth_2020._children : 
    if key.startswith("color_map"):
        del chloropeth_2020._children[key]

#Save the map
m.save('final_map.html')
