In [1]:
# Dependencies and Setup
import pandas as pd

In [2]:
# File path
global_filepath = '../Resources/mmr_global.csv'

# Read in csv file
mmr_global_df = pd.read_csv(global_filepath, names=['name', 'mmr', 'category', 'location', 'country'])

# Display dataframe
mmr_global_df.head()

Unnamed: 0,name,mmr,category,location,country
0,Belarus,2,Very Low (<100),53.744 27.947,Belarus
1,Italy,2,Very Low (<100),45.3889 10.7776,Italy
2,Norway,2,Very Low (<100),60.5366 7.4496,Norway
3,Poland,2,Very Low (<100),52.1484 19.311,Poland
4,Czechia,3,Very Low (<100),49.836 15.5465,Czechia


In [3]:
# Assign lat and lng columns
mmr_global_df['latitude'] = ""
mmr_global_df['longitude'] = ""

for index in mmr_global_df.index:
    location = mmr_global_df.loc[index, 'location'].split(" ")
    lat = location[0]
    lng = location[1]
    mmr_global_df.loc[index, 'latitude'] = lat
    mmr_global_df.loc[index, 'longitude'] = lng

mmr_global_df.head()       

Unnamed: 0,name,mmr,category,location,country,latitude,longitude
0,Belarus,2,Very Low (<100),53.744 27.947,Belarus,53.744,27.947
1,Italy,2,Very Low (<100),45.3889 10.7776,Italy,45.3889,10.7776
2,Norway,2,Very Low (<100),60.5366 7.4496,Norway,60.5366,7.4496
3,Poland,2,Very Low (<100),52.1484 19.311,Poland,52.1484,19.311
4,Czechia,3,Very Low (<100),49.836 15.5465,Czechia,49.836,15.5465


In [4]:
# Assign ranking
mmr_global_df['ranking'] = mmr_global_df['mmr'].rank(method='min').astype(int)

mmr_global_df.head()   

Unnamed: 0,name,mmr,category,location,country,latitude,longitude,ranking
0,Belarus,2,Very Low (<100),53.744 27.947,Belarus,53.744,27.947,1
1,Italy,2,Very Low (<100),45.3889 10.7776,Italy,45.3889,10.7776,1
2,Norway,2,Very Low (<100),60.5366 7.4496,Norway,60.5366,7.4496,1
3,Poland,2,Very Low (<100),52.1484 19.311,Poland,52.1484,19.311,1
4,Czechia,3,Very Low (<100),49.836 15.5465,Czechia,49.836,15.5465,5


In [5]:
# Read in Json file
url = 'https://cdn.anychart.com/samples/maps-general-features/world-choropleth-map/data.json'
global_data = pd.read_json(path_or_buf=url)
global_data.head()

Unnamed: 0,name,id,area,density,population
0,Monaco,MC,2.0,18900.0,37800
1,Singapore,SG,719.0,7697.0,5535000
2,Hong Kong,HK,1104.0,6571.0,7234800
3,Vatican City,VA,0.44,1818.0,800
4,Bahrain,BH,757.0,1631.0,1234571


In [6]:
# Merged dfs to get country id
mmr_global_data_merged =  mmr_global_df.merge(global_data, how='left', on="name")
mmr_global_data_merged.head()

Unnamed: 0,name,mmr,category,location,country,latitude,longitude,ranking,id,area,density,population
0,Belarus,2,Very Low (<100),53.744 27.947,Belarus,53.744,27.947,1,BY,207600.0,46.0,9469200.0
1,Italy,2,Very Low (<100),45.3889 10.7776,Italy,45.3889,10.7776,1,IT,301308.0,202.0,60762320.0
2,Norway,2,Very Low (<100),60.5366 7.4496,Norway,60.5366,7.4496,1,NO,323782.0,15.6,5063709.0
3,Poland,2,Very Low (<100),52.1484 19.311,Poland,52.1484,19.311,1,PL,312685.0,123.0,38496000.0
4,Czechia,3,Very Low (<100),49.836 15.5465,Czechia,49.836,15.5465,5,,,,


In [7]:
# Column selection
mmr_global_id_df = mmr_global_data_merged[['name', 'id', 'mmr', 'ranking', 'category', 'latitude', 'longitude']].copy()

# Display dataframe
mmr_global_id_df.head()

Unnamed: 0,name,id,mmr,ranking,category,latitude,longitude
0,Belarus,BY,2,1,Very Low (<100),53.744,27.947
1,Italy,IT,2,1,Very Low (<100),45.3889,10.7776
2,Norway,NO,2,1,Very Low (<100),60.5366,7.4496
3,Poland,PL,2,1,Very Low (<100),52.1484,19.311
4,Czechia,,3,5,Very Low (<100),49.836,15.5465


In [8]:
# Check if any cell value in each column is null
mmr_global_id_df.count()

name         185
id           166
mmr          185
ranking      185
category     185
latitude     185
longitude    185
dtype: int64

In [9]:
# Get df that contains null
id_none = pd.isna(mmr_global_id_df['id'])

# Get the index of each missing id row
index_id_none = id_none.loc[id_none == True].index

In [10]:
# List of countries that miss the id
list_id_none = list(mmr_global_id_df['name'].loc[index_id_none])

list_id_none

['Czechia',
 'North Macedonia',
 'Republic of Korea',
 'Russian Federation',
 'United States of America',
 'Puerto Rico (USA)',
 'State of Palestine',
 'Brunei Darussalam',
 'Syrian Arab Republic',
 'Viet Nam',
 'Cabo Verde',
 'Micronesia (Federated States of)',
 'Democratic People’s Republic of Korea',
 'Solomon islands',
 'Sao Tome and Principe',
 'Lao People’s Democratic Republic',
 'Congo',
 'Eswatini',
 'Côte d’Ivoire']

In [11]:
# Assign countries names to match id df
country_name = ['Czech Republic', 'Macedonia', 'South Korea','Russia', 'United States', 'Puerto Rico', 'Palestine',
                'Brunei', 'Syria', 'Vietnam', 'Cape Verde', 'Federated States of Micronesia', 'North Korea', 
                'Solomon Islands', 'São Tomé and Príncipe', 'Laos', 'Republic of the Congo',
                'Swaziland', 'Ivory Coast']

for i in range(len(list_id_none)):
        renamedCountry = country_name[i] 
        mmr_global_id_df.loc[index_id_none[i], 'name'] = renamedCountry
        missing_id = global_data.loc[global_data['name'] == renamedCountry].id.values[0]
        mmr_global_id_df.loc[index_id_none[i], 'id'] = missing_id

In [12]:
# Check if id column is not null
mmr_global_id_df.count()

name         185
id           185
mmr          185
ranking      185
category     185
latitude     185
longitude    185
dtype: int64

In [13]:
# Check if the data is correct
mmr_global_id_df.loc[index_id_none, ['name', 'id']]

Unnamed: 0,name,id
4,Czech Republic,CZ
27,Macedonia,MK
42,South Korea,KR
50,Russia,RU
60,United States,US
61,Puerto Rico,PR
68,Palestine,PS
73,Brunei,BN
74,Syria,SY
86,Vietnam,VN


In [14]:
# csv filepath
global_csv = '../output_file/mmr_global.csv'

mmr_global_id_df.to_csv(global_csv, index=False)