In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geojson
import json

#### Load countries data and keep features with least number of NaNs

In [107]:
country_data = pd.read_pickle("../../milestone1/data_viz.pkl")

In [108]:
country_data_flt = country_data[['Year', 'country', 'Revenue', 'FDI','Growth_rate', 'CO2_emission', 'GDP','Mortality_rate','Air pollution','Literacy rate','Renewable_energy']]
country_data_flt = country_data_flt.rename(columns={
    'Year': 'year',
    'Revenue': 'revenue',
    'FDI': 'fdi',
    'Growth_rate': 'growth_rate',
    'CO2_emission': 'co2_emissions',
    'GDP': 'gdp',
    'Mortality_rate':'mortality_rate',
    'Air pollution':'air_pollution',
    'Literacy rate':'literacy_rate',
    'Renewable_energy':'renewable'
})
country_data_flt = country_data_flt[country_data_flt['year'] >= 1950]

#### Load geoJSON map data

In [109]:
# load world map data
with open('custom.geo.json') as geo:
    gj = geojson.load(geo)
features = gj['features']

# keep only useful properties
keep_properties = ['name','name_long','iso_a3','continent','label_x','label_y']

for k in reversed(features):
    properties = {}
    for label in keep_properties: 
        properties[label] = k["properties"][label]
    k["properties"] = properties
    

In [110]:
with open('empty_map.geo.json', 'w') as outfile:
    json.dump(gj, outfile)

#### Change countries name to match geoJSON data

In [111]:
oldNames = [
    'Bolivia (Plurinational State of)',
    'Bosnia and Herzegovina',
    'Brunei Darussalam',
    'Central African Republic',
    'Democratic Republic of the Congo',
    'Dominican Republic',
    'Equatorial Guinea',
    'Iran (Islamic Republic of)',
    'Lao People\'s Democratic Republic',
    'Democratic People\'s Republic of Korea',
    'Republic of Moldova',
    'Russian Federation',
    'Solomon Islands',
    'South Sudan',
    'Republic of Korea',
    'State of Palestine',
    'Syrian Arab Republic',
    'Türkiye',
    'United Kingdom of Great Britain and Northern Ireland',
    'United Republic of Tanzania',
    'Venezuela (Bolivarian Republic of)',
    'Viet Nam',
    'ESwatini',
    'United States',
    'Cote d\'Ivoire'
]
newNames = [
    'Bolivia',
    'Bosnia and Herz.',
    'Brunei',
    'Central African Rep.',
    'Dem. Rep. Congo',
    'Dominican Rep.',
    'Eq. Guinea',
    'Iran',
    'Laos',
    'North Korea',
    'Moldova',
    'Russia',
    'Solomon Is.',
    'S. Sudan',
    'South Korea',
    'Palestine',
    'Syria',
    'Turkey',
    'United Kingdom',
    'Tanzania',
    'Venezuela',
    'Vietnam',
    'eSwatini',
    'United States of America',
    'Côte d\'Ivoire'
]

df = country_data_flt.replace(oldNames,newNames)

#### Join data

In [112]:
feats = [
    'revenue',
    'fdi',
    'growth_rate',
    'co2_emissions',
    'gdp',
    'mortality_rate',
    'air_pollution',
    'literacy_rate',
    'renewable'
]

for f in feats:
    for k in reversed(features):

        country_name = k["properties"]["name"]
        country_data = df.loc[df['country'] == country_name]

        if country_data.empty and f == 'co2_emissions':
            print(country_name)
        else:
            # create dict {year: value}
            val = dict(zip(country_data["year"], country_data[f]))
            k["properties"][f] = val

eSwatini
Dem. Rep. Congo
W. Sahara
Somaliland
N. Cyprus
Timor-Leste
Falkland Is.


In [113]:
df_countries = df['country'].unique().tolist()
map_countries = [k["properties"]["name"] for k in features]

print('### Countries in map but not in dataset:')
for c in map_countries:
    if c not in df_countries:
        print(c)
print('')

print('### Countries in dataset but not in map:')
for c in df_countries:
    if c not in map_countries:
        print(c)

### Countries in map but not in dataset:
Falkland Is.
Timor-Leste
N. Cyprus
Somaliland
W. Sahara
Dem. Rep. Congo
eSwatini

### Countries in dataset but not in map:
Africa
Africa (GCP)
Aland Islands
American Samoa
Andorra
Anguilla
Antarctica
Antigua and Barbuda
Aruba
Asia
Asia (GCP)
Asia (excl. China and India)
Bahrain
Barbados
Bermuda
Bonaire Sint Eustatius and Saba
British Virgin Islands
Cape Verde
Central America (GCP)
Christmas Island
Comoros
Cook Islands
Curacao
Democratic Republic of Congo
Dominica
Eswatini
Europe
Europe (GCP)
Europe (excl. EU-27)
Europe (excl. EU-28)
European Union (27)
European Union (27) (GCP)
European Union (28)
Faeroe Islands
Falkland Islands
French Equatorial Africa (GCP)
French Guiana
French Polynesia
French West Africa (GCP)
Grenada
Guadeloupe
Guernsey
High-income countries
Hong Kong
International transport
Isle of Man
Jersey
Kiribati
Kuwaiti Oil Fires (GCP)
Leeward Islands (GCP)
Liechtenstein
Low-income countries
Lower-middle-income countries
Macao
Maldiv

#### Save file to json format and csv


In [114]:
with open('mapdata.json', 'w') as outfile:
    json.dump(gj, outfile)

### NOTE remember to change NaN to "" ###

In [115]:
df.to_csv('worldmap_csv.csv')

In [116]:
maxs = df.groupby('country').max()[feats]
mins = df.groupby('country').min()[feats]

In [117]:
maxs.to_csv('countries_max.csv')
mins.to_csv('countries_min.csv')