In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geojson
import json

#### Load countries data and keep features with least number of NaNs

In [2]:
country_data = pd.read_pickle("../../milestone1/data_final.pkl")
country_data_flt = country_data[['dt_year', 'country', 'Annual CO₂ emissions (zero filled)','PM2.5 air pollution, population exposed to levels exceeding WHO guideline value (% of total)', 'mortality_rate_perc', 'GDP (constant 2015 US$)','annual_growth_rate_perc', 'fdi_inflows_millionusd', 'total_government_revenue_proportion_of_gdp_perc']]
country_data_flt = country_data_flt.rename(columns={
    'dt_year': 'year',
    'Annual CO₂ emissions (zero filled)': 'co2_emissions',
    'PM2.5 air pollution, population exposed to levels exceeding WHO guideline value (% of total)': 'air_pollution',
    'annual_growth_rate_perc': 'growth',
    'fdi_inflows_millionusd': 'fdi',
    'total_government_revenue_proportion_of_gdp_perc': 'revenue_proportion',
    'mortality_rate_perc': 'mortality',
    'GDP (constant 2015 US$)': 'gdp'
})

#### Load geoJSON map data

In [3]:
# load world map data
with open('custom.geo.json') as geo:
    gj = geojson.load(geo)
features = gj['features']

# keep only useful properties
keep_properties = ['name','name_long','iso_a3','continent','label_x','label_y']

for k in reversed(features):
    properties = {}
    for label in keep_properties:
        if not np.isnan(k["properties"][label]): 
            properties[label] = k["properties"][label]
    k["properties"] = properties
    

#### Change countries name to match geoJSON data

In [4]:
oldNames = [
    'Bolivia (Plurinational State of)',
    'Bosnia and Herzegovina',
    'Brunei Darussalam',
    'Central African Republic',
    'Democratic Republic of the Congo',
    'Dominican Republic',
    'Equatorial Guinea',
    'Iran (Islamic Republic of)',
    'Lao People\'s Democratic Republic',
    'Democratic People\'s Republic of Korea',
    'Republic of Moldova',
    'Russian Federation',
    'Solomon Islands',
    'South Sudan',
    'Republic of Korea',
    'State of Palestine',
    'Syrian Arab Republic',
    'Türkiye',
    'United Kingdom of Great Britain and Northern Ireland',
    'United Republic of Tanzania',
    'Venezuela (Bolivarian Republic of)',
    'Viet Nam',
    'ESwatini'
]
newNames = [
    'Bolivia',
    'Bosnia and Herz.',
    'Brunei',
    'Central African Rep.',
    'Dem. Rep. Congo',
    'Dominican Rep.',
    'Eq. Guinea',
    'Iran',
    'Laos',
    'North Korea',
    'Moldova',
    'Russia',
    'Solomon Is.',
    'S. Sudan',
    'South Korea',
    'Palestine',
    'Syria',
    'Turkey',
    'United Kingdom',
    'Tanzania',
    'Venezuela',
    'Vietnam',
    'eSwatini'
]

df = country_data_flt.replace(oldNames,newNames)

#### Join data

In [5]:
feats = ['co2_emissions',
    'air_pollution',
    'growth',
    'fdi',
    'revenue_proportion',
    'mortality',
    'gdp'
]

for f in feats:
    for k in reversed(features):

        country_name = k["properties"]["name"]
        #print(country_name)
        country_data = df.loc[df['country'] == country_name]
        if country_data.empty and f == 'Annual CO₂ emissions':
            print(country_name)
        else:
            # create dict {year: emission}
            emissions = dict(zip(country_data["year"], country_data[f]))
            k["properties"][f] = emissions

In [6]:
df_countries = df['country'].unique().tolist()
map_countries = [k["properties"]["name"] for k in features]

print('### Countries in map but not in dataset:')
for c in map_countries:
    if c not in df_countries:
        print(c)
print('')

print('### Countries in dataset but not in map:')
for c in df_countries:
    if c not in map_countries:
        print(c)

### Countries in map but not in dataset:
Falkland Is.
N. Cyprus
Taiwan
Somaliland
W. Sahara
eSwatini

### Countries in dataset but not in map:
American Samoa
Andorra
Anguilla
Antigua and Barbuda
Aruba
Bahrain
Barbados
Bermuda
British Virgin Islands
Cabo Verde
Cayman Islands
China, Hong Kong Special Administrative Region
China, Macao Special Administrative Region
Comoros
Cook Islands
Curaçao
Dominica
Eswatini
French Polynesia
Grenada
Guadeloupe
Isle of Man
Kiribati
Liechtenstein
Maldives
Malta
Marshall Islands
Martinique
Mauritius
Mayotte
Micronesia (Federated States of)
Monaco
Montserrat
Nauru
Niue
Palau
Saint Helena
Saint Kitts and Nevis
Saint Lucia
Saint Vincent and the Grenadines
Samoa
San Marino
Sao Tome and Principe
Seychelles
Singapore
Sint Maarten (Dutch part)
Tonga
Turks and Caicos Islands
Tuvalu
United States Virgin Islands


#### Save file to json format and csv


In [7]:
with open('mapdata.json', 'w') as outfile:
    json.dump(gj, outfile)