# Process Worldwide Data from WHO

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import pytz
import geopandas as gpd
import mapclassify as mc

Step0:  
Read Data and get basic country groups

0.0 Report Data

In [2]:
# Read updated JSON file
import json
with open("global-covid19-who-gis.json") as f:
    df_world_all = json.load(f)
df_world = df_world_all['result']['pageContext']['rawDataSets']['countryGroups']
df_world_original = pd.DataFrame()
for i in range (0,len(df_world)):
    df_world_per_line = pd.DataFrame(df_world[i]['data']['rows'], columns = ['Date_reported','WHO_region',
                                                                          'New_deaths','Cumulative_deaths',
                                                                          'Weekly_deaths','Weekly_deaths_change',
                                                                          'Deaths_per_100k',
                                                                          'New_cases','Cumulative_cases',
                                                                          'Weekly_cases','Weekly_cases_change',
                                                                          'Cases_per_100k',
                                                                          'WkCasePop','WkDeathPop'])
    df_world_per_line['Country_code'] = df_world[i]['value']
    df_world_original = df_world_original.append(df_world_per_line)
df_world_original = df_world_original.reset_index()
df_world_original

Unnamed: 0,index,Date_reported,WHO_region,New_deaths,Cumulative_deaths,Weekly_deaths,Weekly_deaths_change,Deaths_per_100k,New_cases,Cumulative_cases,Weekly_cases,Weekly_cases_change,Cases_per_100k,WkCasePop,WkDeathPop,Country_code
0,0,1578009600000,AMRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,US
1,1,1578096000000,AMRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,US
2,2,1578182400000,AMRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,US
3,3,1578268800000,AMRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,US
4,4,1578355200000,AMRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,US
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97639,407,1613174400000,WPRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,TV
97640,408,1613260800000,WPRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,TV
97641,409,1613347200000,WPRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,TV
97642,410,1613433600000,WPRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,TV


In [3]:
# Convert the time by using the UTC timezone
def time_conversion(in_time):
    out_time = datetime.fromtimestamp(int(in_time)/1000,pytz.utc)
    return out_time.strftime("%Y-%m-%dT%H:%M:%SZ")
df_world_original['Date_reported']=df_world_original['Date_reported'].apply(time_conversion)
df_world_original

Unnamed: 0,index,Date_reported,WHO_region,New_deaths,Cumulative_deaths,Weekly_deaths,Weekly_deaths_change,Deaths_per_100k,New_cases,Cumulative_cases,Weekly_cases,Weekly_cases_change,Cases_per_100k,WkCasePop,WkDeathPop,Country_code
0,0,2020-01-03T00:00:00Z,AMRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,US
1,1,2020-01-04T00:00:00Z,AMRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,US
2,2,2020-01-05T00:00:00Z,AMRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,US
3,3,2020-01-06T00:00:00Z,AMRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,US
4,4,2020-01-07T00:00:00Z,AMRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,US
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97639,407,2021-02-13T00:00:00Z,WPRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,TV
97640,408,2021-02-14T00:00:00Z,WPRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,TV
97641,409,2021-02-15T00:00:00Z,WPRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,TV
97642,410,2021-02-16T00:00:00Z,WPRO,0,0,0,0.0,0,0,0,0,0.0,0,0,0,TV


In [4]:
# Only keep the necessary columns and rename them
df_world_original = df_world_original[['Date_reported','Country_code','Cumulative_deaths','Cumulative_cases']]
df_world_original = df_world_original.rename(columns={"Date_reported": "date", "Country_code": "country_code", 
                                                      "Cumulative_cases": "cases", "Cumulative_deaths": "deaths"})
df_world_original

Unnamed: 0,date,country_code,deaths,cases
0,2020-01-03T00:00:00Z,US,0,0
1,2020-01-04T00:00:00Z,US,0,0
2,2020-01-05T00:00:00Z,US,0,0
3,2020-01-06T00:00:00Z,US,0,0
4,2020-01-07T00:00:00Z,US,0,0
...,...,...,...,...
97639,2021-02-13T00:00:00Z,TV,0,0
97640,2021-02-14T00:00:00Z,TV,0,0
97641,2021-02-15T00:00:00Z,TV,0,0
97642,2021-02-16T00:00:00Z,TV,0,0


In [5]:
# Only keep the date
df_world_original['date'] = df_world_original['date'].str[0:10]
df_world_original

Unnamed: 0,date,country_code,deaths,cases
0,2020-01-03,US,0,0
1,2020-01-04,US,0,0
2,2020-01-05,US,0,0
3,2020-01-06,US,0,0
4,2020-01-07,US,0,0
...,...,...,...,...
97639,2021-02-13,TV,0,0
97640,2021-02-14,TV,0,0
97641,2021-02-15,TV,0,0
97642,2021-02-16,TV,0,0


0.1 GeoJSON Data

In [6]:
# Read GeoJson data
old_world_geojson_df = gpd.read_file("World_Countries_Boundaries_new.geojson")
old_world_geojson_df

Unnamed: 0,OBJECTID,FIPS_CNTRY,ISO_2DIGIT,ISO_3DIGIT,NAME,LONG_NAME,LOCSHRTNAM,LOCLNGNAM,CAPITAL,COUNTRYAFF,CONTINENT,COLORMAP,Shape_Leng,Shape_Area,geometry
0,1,AQ,AS,ASM,American Samoa,Territory of American Samoa,,,Pago Pago,United States,Oceania,2,0.600124,0.013720,"POLYGON ((-170.744 -14.376, -170.823 -14.324, ..."
1,2,WQ,UM,UMI,United States Minor Outlying Islands,,,,,United States,Oceania,7,0.028875,0.000034,"MULTIPOLYGON (((-160.021 -0.398, -160.043 -0.3..."
2,3,CW,CK,COK,Cook Islands,Cook Islands,,,Avarua,New Zealand,Oceania,6,0.980664,0.013073,"MULTIPOLYGON (((-159.747 -21.257, -159.833 -21..."
3,4,FP,PF,PYF,French Polynesia,Overseas Lands of French Polynesia,Polynesie Francaise,Pays d'outre-mer de la Polynesie Francaise,Papeete,France,Oceania,4,3.930211,0.175332,"MULTIPOLYGON (((-149.179 -17.871, -149.276 -17..."
4,7,NE,NU,NIU,Niue,Niue,,,Alofi,New Zealand,Oceania,2,0.541413,0.021414,"POLYGON ((-169.894 -19.146, -169.931 -19.124, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,249,FM,FM,FSM,Micronesia,Federated States of Micronesia,,,Palikir,Micronesia,Oceania,2,1.042421,0.042206,"MULTIPOLYGON (((158.228 6.781, 158.155 6.819, ..."
240,250,CQ,MP,MNP,Northern Mariana Islands,Commonwealth of the Northern Mariana Islands,,,Saipan,United States,Oceania,1,0.908853,0.019927,"MULTIPOLYGON (((145.735 15.087, 145.685 15.101..."
241,251,PS,PW,PLW,Palau,Republic of Palau,Belau,Beluu er a Belau,Melekeok,Palau,Oceania,1,1.105323,0.031136,"MULTIPOLYGON (((134.531 7.354, 134.485 7.438, ..."
242,253,RS,RU,RUS,Russian Federation,Russian Federation,Rossiya,Rossiyskaya Federatsiya,Moscow,Russian Federation,Asia,1,1526.025072,2929.641772,"MULTIPOLYGON (((131.873 42.957, 131.824 42.953..."


In [7]:
# Read population data
pop_df = pd.read_csv("world_population.csv")
pop_df

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,Unnamed: 64
0,Aruba,ABW,"Population, total",SP.POP.TOTL,54211.0,55438.0,56225.0,56695.0,57032.0,57360.0,...,102046.0,102560.0,103159.0,103774.0,104341.0,104872.0,105366.0,105845.0,106314.0,
1,Afghanistan,AFG,"Population, total",SP.POP.TOTL,8996973.0,9169410.0,9351441.0,9543205.0,9744781.0,9956320.0,...,30117413.0,31161376.0,32269589.0,33370794.0,34413603.0,35383128.0,36296400.0,37172386.0,38041754.0,
2,Angola,AGO,"Population, total",SP.POP.TOTL,5454933.0,5531472.0,5608539.0,5679458.0,5735044.0,5770570.0,...,24220661.0,25107931.0,26015780.0,26941779.0,27884381.0,28842484.0,29816748.0,30809762.0,31825295.0,
3,Albania,ALB,"Population, total",SP.POP.TOTL,1608800.0,1659800.0,1711319.0,1762621.0,1814135.0,1864791.0,...,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,
4,Andorra,AND,"Population, total",SP.POP.TOTL,13411.0,14375.0,15370.0,16412.0,17469.0,18549.0,...,83747.0,82427.0,80774.0,79213.0,78011.0,77297.0,77001.0,77006.0,77142.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,Kosovo,XKX,"Population, total",SP.POP.TOTL,947000.0,966000.0,994000.0,1022000.0,1050000.0,1078000.0,...,1791000.0,1807106.0,1818117.0,1812771.0,1788196.0,1777557.0,1791003.0,1797085.0,1794248.0,
260,"Yemen, Rep.",YEM,"Population, total",SP.POP.TOTL,5315355.0,5393036.0,5473671.0,5556766.0,5641597.0,5727751.0,...,23807588.0,24473178.0,25147109.0,25823485.0,26497889.0,27168210.0,27834821.0,28498687.0,29161922.0,
261,South Africa,ZAF,"Population, total",SP.POP.TOTL,17099840.0,17524533.0,17965725.0,18423161.0,18896307.0,19384841.0,...,52004172.0,52834005.0,53689236.0,54545991.0,55386367.0,56203654.0,57000451.0,57779622.0,58558270.0,
262,Zambia,ZMB,"Population, total",SP.POP.TOTL,3070776.0,3164329.0,3260650.0,3360104.0,3463213.0,3570464.0,...,14023193.0,14465121.0,14926504.0,15399753.0,15879361.0,16363507.0,16853688.0,17351822.0,17861030.0,


In [8]:
# Only keep the necessary columns
pop_df = pop_df[["Country Code","2019"]]
pop_df

Unnamed: 0,Country Code,2019
0,ABW,106314.0
1,AFG,38041754.0
2,AGO,31825295.0
3,ALB,2854191.0
4,AND,77142.0
...,...,...
259,XKX,1794248.0
260,YEM,29161922.0
261,ZAF,58558270.0
262,ZMB,17861030.0


In [9]:
# Merge geoJSON with population data
old_world_geojson_df = pd.merge(old_world_geojson_df, pop_df, how='left', left_on=['ISO_3DIGIT'], right_on = ['Country Code'])
old_world_geojson_df = old_world_geojson_df.rename(columns={"2019": "population"})
old_world_geojson_df

Unnamed: 0,OBJECTID,FIPS_CNTRY,ISO_2DIGIT,ISO_3DIGIT,NAME,LONG_NAME,LOCSHRTNAM,LOCLNGNAM,CAPITAL,COUNTRYAFF,CONTINENT,COLORMAP,Shape_Leng,Shape_Area,geometry,Country Code,population
0,1,AQ,AS,ASM,American Samoa,Territory of American Samoa,,,Pago Pago,United States,Oceania,2,0.600124,0.013720,"POLYGON ((-170.744 -14.376, -170.823 -14.324, ...",ASM,55312.0
1,2,WQ,UM,UMI,United States Minor Outlying Islands,,,,,United States,Oceania,7,0.028875,0.000034,"MULTIPOLYGON (((-160.021 -0.398, -160.043 -0.3...",,
2,3,CW,CK,COK,Cook Islands,Cook Islands,,,Avarua,New Zealand,Oceania,6,0.980664,0.013073,"MULTIPOLYGON (((-159.747 -21.257, -159.833 -21...",,
3,4,FP,PF,PYF,French Polynesia,Overseas Lands of French Polynesia,Polynesie Francaise,Pays d'outre-mer de la Polynesie Francaise,Papeete,France,Oceania,4,3.930211,0.175332,"MULTIPOLYGON (((-149.179 -17.871, -149.276 -17...",PYF,279287.0
4,7,NE,NU,NIU,Niue,Niue,,,Alofi,New Zealand,Oceania,2,0.541413,0.021414,"POLYGON ((-169.894 -19.146, -169.931 -19.124, ...",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,249,FM,FM,FSM,Micronesia,Federated States of Micronesia,,,Palikir,Micronesia,Oceania,2,1.042421,0.042206,"MULTIPOLYGON (((158.228 6.781, 158.155 6.819, ...",FSM,113815.0
240,250,CQ,MP,MNP,Northern Mariana Islands,Commonwealth of the Northern Mariana Islands,,,Saipan,United States,Oceania,1,0.908853,0.019927,"MULTIPOLYGON (((145.735 15.087, 145.685 15.101...",MNP,57216.0
241,251,PS,PW,PLW,Palau,Republic of Palau,Belau,Beluu er a Belau,Melekeok,Palau,Oceania,1,1.105323,0.031136,"MULTIPOLYGON (((134.531 7.354, 134.485 7.438, ...",PLW,18008.0
242,253,RS,RU,RUS,Russian Federation,Russian Federation,Rossiya,Rossiyskaya Federatsiya,Moscow,Russian Federation,Asia,1,1526.025072,2929.641772,"MULTIPOLYGON (((131.873 42.957, 131.824 42.953...",RUS,144373535.0


In [10]:
# Only keep the necessary columns
old_world_geojson_df = old_world_geojson_df[['ISO_2DIGIT','NAME','population','geometry']]
old_world_geojson_df

Unnamed: 0,ISO_2DIGIT,NAME,population,geometry
0,AS,American Samoa,55312.0,"POLYGON ((-170.744 -14.376, -170.823 -14.324, ..."
1,UM,United States Minor Outlying Islands,,"MULTIPOLYGON (((-160.021 -0.398, -160.043 -0.3..."
2,CK,Cook Islands,,"MULTIPOLYGON (((-159.747 -21.257, -159.833 -21..."
3,PF,French Polynesia,279287.0,"MULTIPOLYGON (((-149.179 -17.871, -149.276 -17..."
4,NU,Niue,,"POLYGON ((-169.894 -19.146, -169.931 -19.124, ..."
...,...,...,...,...
239,FM,Micronesia,113815.0,"MULTIPOLYGON (((158.228 6.781, 158.155 6.819, ..."
240,MP,Northern Mariana Islands,57216.0,"MULTIPOLYGON (((145.735 15.087, 145.685 15.101..."
241,PW,Palau,18008.0,"MULTIPOLYGON (((134.531 7.354, 134.485 7.438, ..."
242,RU,Russian Federation,144373535.0,"MULTIPOLYGON (((131.873 42.957, 131.824 42.953..."


In [11]:
# Add Kosovo geometry
import shapely.wkt
polygon1 = shapely.wkt.loads('POLYGON ((21.160269147746988 42.66353984291385, 21.160269147746988 42.6571648755645, 21.169109708660073 42.6571648755645, 21.169109708660073 42.66353984291385, 21.160269147746988 42.66353984291385))')
old_world_geojson_df = old_world_geojson_df.append({'ISO_2DIGIT':'XK', 'NAME':'Kosovo', 'geometry': polygon1}, ignore_index=True)

In [12]:
# Add OTHERS geometry
import shapely.wkt
polygon2 = shapely.wkt.loads('POLYGON ((6.124162972753766 46.22138299367113, 6.124162972753766 46.20594098233243, 6.1473372586424375 46.20594098233243, 6.1473372586424375 46.22138299367113, 6.124162972753766 46.22138299367113))')
# Country code of OTHER in JSON file is blank space
old_world_geojson_df = old_world_geojson_df.append({'ISO_2DIGIT':' ', 'NAME':'Others', 'geometry': polygon2}, ignore_index=True)

In [13]:
# Fill NA values in population
old_world_geojson_df["population"] = old_world_geojson_df["population"].fillna(0).astype(int)
old_world_geojson_df

Unnamed: 0,ISO_2DIGIT,NAME,population,geometry
0,AS,American Samoa,55312,"POLYGON ((-170.744 -14.376, -170.823 -14.324, ..."
1,UM,United States Minor Outlying Islands,0,"MULTIPOLYGON (((-160.021 -0.398, -160.043 -0.3..."
2,CK,Cook Islands,0,"MULTIPOLYGON (((-159.747 -21.257, -159.833 -21..."
3,PF,French Polynesia,279287,"MULTIPOLYGON (((-149.179 -17.871, -149.276 -17..."
4,NU,Niue,0,"POLYGON ((-169.894 -19.146, -169.931 -19.124, ..."
...,...,...,...,...
241,PW,Palau,18008,"MULTIPOLYGON (((134.531 7.354, 134.485 7.438, ..."
242,RU,Russian Federation,144373535,"MULTIPOLYGON (((131.873 42.957, 131.824 42.953..."
243,ES,Spain,47076781,"MULTIPOLYGON (((-17.911 27.774, -17.983 27.638..."
244,XK,Kosovo,0,"POLYGON ((21.160 42.664, 21.160 42.657, 21.169..."


0.2 Group Data to world

In [14]:
# Transform cases/deaths data table to pivot table using country code as main index
pivot_world = pd.pivot_table(df_world_original , index=['country_code'], columns=['date'])
pivot_world

Unnamed: 0_level_0,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases,...,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths
date,2020-01-03,2020-01-04,2020-01-05,2020-01-06,2020-01-07,2020-01-08,2020-01-09,2020-01-10,2020-01-11,2020-01-12,...,2021-02-08,2021-02-09,2021-02-10,2021-02-11,2021-02-12,2021-02-13,2021-02-14,2021-02-15,2021-02-16,2021-02-17
country_code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
,0,0,0,0,0,0,0,0,0,0,...,13,13,13,13,13,13,13,13,13,13
AD,0,0,0,0,0,0,0,0,0,0,...,106,106,106,106,106,106,107,107,107,107
AE,0,0,0,0,0,0,0,0,0,0,...,921,930,947,956,974,986,1001,1014,1027,1041
AF,0,0,0,0,0,0,0,0,0,0,...,2413,2414,2418,2419,2424,2427,2427,2427,2428,2428
AG,0,0,0,0,0,0,0,0,0,0,...,7,7,8,9,9,9,9,9,9,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YE,0,0,0,0,0,0,0,0,0,0,...,616,616,616,617,617,617,617,618,618,619
YT,0,0,0,0,0,0,0,0,0,0,...,64,65,68,69,72,72,78,78,81,85
ZA,0,0,0,0,0,0,0,0,0,0,...,46290,46473,46869,47145,47382,47670,47821,47899,48094,48313
ZM,0,0,0,0,0,0,0,0,0,0,...,853,853,881,901,914,931,940,951,959,974


In [15]:
# Get dates from data table
date= pivot_world['cases'].columns
date

Index(['2020-01-03', '2020-01-04', '2020-01-05', '2020-01-06', '2020-01-07',
       '2020-01-08', '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12',
       ...
       '2021-02-08', '2021-02-09', '2021-02-10', '2021-02-11', '2021-02-12',
       '2021-02-13', '2021-02-14', '2021-02-15', '2021-02-16', '2021-02-17'],
      dtype='object', name='date', length=412)

In [16]:
# Fix the bug when WHO only updates part of the data for the lastest date
lastColumn = pivot_world['cases'][date[-1]]
for i in range(0, len(lastColumn)):
    if lastColumn.isna()[i] == True:
        lastColumn[i] = pivot_world['cases'][date[-2]][i]
pivot_world['cases'][date[-1]] = lastColumn
# pivot_world

lastColumn = pivot_world['deaths'][date[-1]]
for i in range(0, len(lastColumn)):
    if lastColumn.isna()[i] == True:
        lastColumn[i] = pivot_world['deaths'][date[-2]][i]
pivot_world['deaths'][date[-1]] = lastColumn
pivot_world

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


Unnamed: 0_level_0,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases,...,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths
date,2020-01-03,2020-01-04,2020-01-05,2020-01-06,2020-01-07,2020-01-08,2020-01-09,2020-01-10,2020-01-11,2020-01-12,...,2021-02-08,2021-02-09,2021-02-10,2021-02-11,2021-02-12,2021-02-13,2021-02-14,2021-02-15,2021-02-16,2021-02-17
country_code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
,0,0,0,0,0,0,0,0,0,0,...,13,13,13,13,13,13,13,13,13,13
AD,0,0,0,0,0,0,0,0,0,0,...,106,106,106,106,106,106,107,107,107,107
AE,0,0,0,0,0,0,0,0,0,0,...,921,930,947,956,974,986,1001,1014,1027,1041
AF,0,0,0,0,0,0,0,0,0,0,...,2413,2414,2418,2419,2424,2427,2427,2427,2428,2428
AG,0,0,0,0,0,0,0,0,0,0,...,7,7,8,9,9,9,9,9,9,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YE,0,0,0,0,0,0,0,0,0,0,...,616,616,616,617,617,617,617,618,618,619
YT,0,0,0,0,0,0,0,0,0,0,...,64,65,68,69,72,72,78,78,81,85
ZA,0,0,0,0,0,0,0,0,0,0,...,46290,46473,46869,47145,47382,47670,47821,47899,48094,48313
ZM,0,0,0,0,0,0,0,0,0,0,...,853,853,881,901,914,931,940,951,959,974


In [17]:
# Fill NA values with 0
pivot_world = pivot_world.replace(np.nan, 0)
pivot_world

Unnamed: 0_level_0,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases,...,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths
date,2020-01-03,2020-01-04,2020-01-05,2020-01-06,2020-01-07,2020-01-08,2020-01-09,2020-01-10,2020-01-11,2020-01-12,...,2021-02-08,2021-02-09,2021-02-10,2021-02-11,2021-02-12,2021-02-13,2021-02-14,2021-02-15,2021-02-16,2021-02-17
country_code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
,0,0,0,0,0,0,0,0,0,0,...,13,13,13,13,13,13,13,13,13,13
AD,0,0,0,0,0,0,0,0,0,0,...,106,106,106,106,106,106,107,107,107,107
AE,0,0,0,0,0,0,0,0,0,0,...,921,930,947,956,974,986,1001,1014,1027,1041
AF,0,0,0,0,0,0,0,0,0,0,...,2413,2414,2418,2419,2424,2427,2427,2427,2428,2428
AG,0,0,0,0,0,0,0,0,0,0,...,7,7,8,9,9,9,9,9,9,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YE,0,0,0,0,0,0,0,0,0,0,...,616,616,616,617,617,617,617,618,618,619
YT,0,0,0,0,0,0,0,0,0,0,...,64,65,68,69,72,72,78,78,81,85
ZA,0,0,0,0,0,0,0,0,0,0,...,46290,46473,46869,47145,47382,47670,47821,47899,48094,48313
ZM,0,0,0,0,0,0,0,0,0,0,...,853,853,881,901,914,931,940,951,959,974


0.3 Get Date Info

In [18]:
# Get dates from data table
date= pivot_world['cases'].columns
date

Index(['2020-01-03', '2020-01-04', '2020-01-05', '2020-01-06', '2020-01-07',
       '2020-01-08', '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12',
       ...
       '2021-02-08', '2021-02-09', '2021-02-10', '2021-02-11', '2021-02-12',
       '2021-02-13', '2021-02-14', '2021-02-15', '2021-02-16', '2021-02-17'],
      dtype='object', name='date', length=412)

In [19]:
# Sort dates and get the start/end
date_str = np.sort(date)
dt_str_start=np.min(date_str)
dt_str_end=np.max(date_str)

In [20]:
# Format dates and create a list
start = datetime.strptime(dt_str_start, "%Y-%m-%d")
end = datetime.strptime(dt_str_end, "%Y-%m-%d")
dt_range = pd.date_range(start=start,end=end)
print(len(dt_range), dt_range)
dt_range_str = list(map(lambda x: x.strftime("%Y-%m-%d"), dt_range.tolist()))
print(len(dt_range_str), dt_range_str)

412 DatetimeIndex(['2020-01-03', '2020-01-04', '2020-01-05', '2020-01-06',
               '2020-01-07', '2020-01-08', '2020-01-09', '2020-01-10',
               '2020-01-11', '2020-01-12',
               ...
               '2021-02-08', '2021-02-09', '2021-02-10', '2021-02-11',
               '2021-02-12', '2021-02-13', '2021-02-14', '2021-02-15',
               '2021-02-16', '2021-02-17'],
              dtype='datetime64[ns]', length=412, freq='D')
412 ['2020-01-03', '2020-01-04', '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08', '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12', '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16', '2020-01-17', '2020-01-18', '2020-01-19', '2020-01-20', '2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24', '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28', '2020-01-29', '2020-01-30', '2020-01-31', '2020-02-01', '2020-02-02', '2020-02-03', '2020-02-04', '2020-02-05', '2020-02-06', '2020-02-07', '2020-02-08', '2020-02-09', '2020-02-

Step1:  
Add Cases Time Series, First Case Date, Death Time Series, First Death Date

In [21]:
# import json
# pivot_world['cases_ts'] = json.dumps({"values": pivot_world['cases'].values.tolist()[0]})
# pivot_world['deaths_ts'] =  json.dumps({"values": pivot_world['deaths'].values.tolist()[0]})

pivot_world['cases_ts'] =  pivot_world['cases'].values.tolist()
pivot_world['deaths_ts'] =  pivot_world['deaths'].values.tolist()

In [22]:
# Get the date of first case/death
pivot_world['dt_first_case'] = (pivot_world['cases'] > 0).idxmax(axis=1)
pivot_world['dt_first_death'] = (pivot_world['deaths'] > 0).idxmax(axis=1)
#For death reports, deal with no deaths world
pivot_world.loc[pivot_world['deaths'].iloc[:, -1] <= 0, 'dt_first_death'] = np.nan
pivot_world.head(5)

Unnamed: 0_level_0,cases,cases,cases,cases,cases,cases,cases,cases,cases,cases,...,deaths,deaths,deaths,deaths,deaths,deaths,cases_ts,deaths_ts,dt_first_case,dt_first_death
date,2020-01-03,2020-01-04,2020-01-05,2020-01-06,2020-01-07,2020-01-08,2020-01-09,2020-01-10,2020-01-11,2020-01-12,...,2021-02-12,2021-02-13,2021-02-14,2021-02-15,2021-02-16,2021-02-17,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
country_code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
,0,0,0,0,0,0,0,0,0,0,...,13,13,13,13,13,13,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2020-02-05,2020-02-20
AD,0,0,0,0,0,0,0,0,0,0,...,106,106,107,107,107,107,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2020-03-03,2020-03-22
AE,0,0,0,0,0,0,0,0,0,0,...,974,986,1001,1014,1027,1041,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2020-01-29,2020-03-21
AF,0,0,0,0,0,0,0,0,0,0,...,2424,2427,2427,2427,2428,2428,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2020-02-24,2020-03-23
AG,0,0,0,0,0,0,0,0,0,0,...,9,9,9,9,9,9,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2020-03-13,2020-04-09


Step2:  
Add today_case, today_new_case, today_death, today_new_death

In [23]:
# Get the case/death numbers of today and yesterday
yesterday = date.values[-2]
day_before_yes = date.values[-3]
pivot_world['yesterday_case'] = pivot_world['cases'][yesterday]
pivot_world['yesterday_new_case'] = pivot_world['cases'][yesterday] - pivot_world['cases'][day_before_yes]
pivot_world['yesterday_death'] = pivot_world['deaths'][yesterday]
pivot_world['yesterday_new_death'] = pivot_world['deaths'][yesterday] - pivot_world['deaths'][day_before_yes]
pivot_world['today_case'] = pivot_world['cases'][dt_str_end]
pivot_world['today_new_case'] = pivot_world['cases'][dt_str_end] - pivot_world['cases'][yesterday]
pivot_world['today_death'] = pivot_world['deaths'][dt_str_end]
pivot_world['today_new_death'] = pivot_world['deaths'][dt_str_end] - pivot_world['deaths'][yesterday]

Step3:  
Extract necessary columns

In [24]:
# Only keep the necessary columns
report_df = pivot_world[['cases_ts','deaths_ts','dt_first_case','dt_first_death','today_case','today_new_case', 'today_death','today_new_death','yesterday_case','yesterday_new_case','yesterday_death','yesterday_new_death']]
report_df = report_df.reset_index()
report_df.columns = ['country_code','cases_ts','deaths_ts','dt_first_case','dt_first_death','today_case','today_new_case', 'today_death','today_new_death','yesterday_case','yesterday_new_case','yesterday_death','yesterday_new_death']
report_df.head(5)

Unnamed: 0,country_code,cases_ts,deaths_ts,dt_first_case,dt_first_death,today_case,today_new_case,today_death,today_new_death,yesterday_case,yesterday_new_case,yesterday_death,yesterday_new_death
0,,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2020-02-05,2020-02-20,745,0,13,0,745,0,13,0
1,AD,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2020-03-03,2020-03-22,10555,17,107,0,10538,35,107,0
2,AE,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2020-01-29,2020-03-21,355131,3236,1041,14,351895,3123,1027,13
3,AF,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2020-02-24,2020-03-23,55540,22,2428,0,55518,4,2428,1
4,AG,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2020-03-13,2020-04-09,443,16,9,0,427,0,9,0


Step4:  
Assign Geometries to world

In [25]:
# Merge case dataframe and geometry
final_df = pd.merge(old_world_geojson_df, report_df, how='left', left_on=['ISO_2DIGIT'], right_on = ['country_code'])
final_df.columns

Index(['ISO_2DIGIT', 'NAME', 'population', 'geometry', 'country_code',
       'cases_ts', 'deaths_ts', 'dt_first_case', 'dt_first_death',
       'today_case', 'today_new_case', 'today_death', 'today_new_death',
       'yesterday_case', 'yesterday_new_case', 'yesterday_death',
       'yesterday_new_death'],
      dtype='object')

Step5:  
Deal with nan values

In [26]:
# Replace NAN values with 0
final_df[['today_case','today_new_case','today_death','today_new_death','yesterday_case','yesterday_new_case','yesterday_death','yesterday_new_death']] = final_df[['today_case','today_new_case','today_death','today_new_death','yesterday_case','yesterday_new_case','yesterday_death','yesterday_new_death']].replace(np.nan,0)

In [27]:
# Set up template
template =final_df[~final_df['cases_ts'].isna()]['cases_ts']
template = template.iloc[0]

In [28]:
for x in range(0,len(template)):
    template[x] = 0
template = ','.join(map(str, template))

In [29]:
# Change list format to string with commas
final_df['cases_ts'] = final_df['cases_ts'].apply(lambda x: ','.join(map(str, x)) if type(x) is list else template)
final_df['deaths_ts'] = final_df['deaths_ts'].apply(lambda x: ','.join(map(str, x)) if type(x) is list else template)
final_df.head(5)

Unnamed: 0,ISO_2DIGIT,NAME,population,geometry,country_code,cases_ts,deaths_ts,dt_first_case,dt_first_death,today_case,today_new_case,today_death,today_new_death,yesterday_case,yesterday_new_case,yesterday_death,yesterday_new_death
0,AS,American Samoa,55312,"POLYGON ((-170.74390 -14.37555, -170.82323 -14...",AS,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-03,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,UM,United States Minor Outlying Islands,0,"MULTIPOLYGON (((-160.02114 -0.39805, -160.0434...",,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,CK,Cook Islands,0,"MULTIPOLYGON (((-159.74698 -21.25667, -159.832...",CK,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-03,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,PF,French Polynesia,279287,"MULTIPOLYGON (((-149.17920 -17.87084, -149.275...",PF,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-03-12,2020-09-11,18299.0,6.0,136.0,1.0,18293.0,30.0,135.0,0.0
4,NU,Niue,0,"POLYGON ((-169.89389 -19.14556, -169.93088 -19...",NU,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-03,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Step6:  
Finalize the Dataframe

In [30]:
# Change column names
final_df['start'] = dt_str_start
final_df['end'] = dt_str_end
final_df['dt_unit'] = "day"
final_df

Unnamed: 0,ISO_2DIGIT,NAME,population,geometry,country_code,cases_ts,deaths_ts,dt_first_case,dt_first_death,today_case,today_new_case,today_death,today_new_death,yesterday_case,yesterday_new_case,yesterday_death,yesterday_new_death,start,end,dt_unit
0,AS,American Samoa,55312,"POLYGON ((-170.744 -14.376, -170.823 -14.324, ...",AS,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-03,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-03,2021-02-17,day
1,UM,United States Minor Outlying Islands,0,"MULTIPOLYGON (((-160.021 -0.398, -160.043 -0.3...",,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-03,2021-02-17,day
2,CK,Cook Islands,0,"MULTIPOLYGON (((-159.747 -21.257, -159.833 -21...",CK,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-03,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-03,2021-02-17,day
3,PF,French Polynesia,279287,"MULTIPOLYGON (((-149.179 -17.871, -149.276 -17...",PF,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-03-12,2020-09-11,18299.0,6.0,136.0,1.0,18293.0,30.0,135.0,0.0,2020-01-03,2021-02-17,day
4,NU,Niue,0,"POLYGON ((-169.894 -19.146, -169.931 -19.124, ...",NU,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-03,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-03,2021-02-17,day
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
241,PW,Palau,18008,"MULTIPOLYGON (((134.531 7.354, 134.485 7.438, ...",PW,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-03,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-03,2021-02-17,day
242,RU,Russian Federation,144373535,"MULTIPOLYGON (((131.873 42.957, 131.824 42.953...",RU,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-31,2020-03-26,4112151.0,12828.0,81446.0,467.0,4099323.0,13233.0,80979.0,459.0,2020-01-03,2021-02-17,day
243,ES,Spain,47076781,"MULTIPOLYGON (((-17.911 27.774, -17.983 27.638...",ES,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-02-02,2020-02-13,3096343.0,3857.0,65979.0,54.0,3092486.0,3078.0,65925.0,134.0,2020-01-03,2021-02-17,day
244,XK,Kosovo,0,"POLYGON ((21.160 42.664, 21.160 42.657, 21.169...",XK,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-03-13,2020-03-22,64725.0,249.0,1538.0,3.0,64476.0,178.0,1535.0,1.0,2020-01-03,2021-02-17,day


In [31]:
# Find the countries/regions with 0 population
final_df.loc[final_df['population'] == 0, 'NAME'].unique()

array(['United States Minor Outlying Islands', 'Cook Islands', 'Niue',
       'Pitcairn', 'Tokelau', 'Wallis and Futuna', 'Falkland Islands',
       'South Georgia and South Sandwich Islands', 'Antarctica',
       'Saint Helena', 'Anguilla', 'Bonaire', 'French Guiana',
       'Guadeloupe', 'Martinique', 'Montserrat', 'Saint Barthelemy',
       'Saint Pierre and Miquelon', 'Guernsey', 'Jersey', 'Jan Mayen',
       'Bouvet Island', 'British Indian Ocean Territory',
       'French Southern Territories', 'Heard Island and McDonald Islands',
       'Mayotte', 'Réunion', 'Eritrea', 'Vatican City',
       'Christmas Island', 'Cocos Islands', 'Norfolk Island', 'Kosovo',
       'Others'], dtype=object)

In [32]:
# Manually update the population
final_df.loc[final_df['NAME'] == 'United States Minor Outlying Islands', 'population'] = 300
final_df.loc[final_df['NAME'] == 'Cook Islands', 'population'] = 15200
final_df.loc[final_df['NAME'] == 'Niue', 'population'] = 1624
final_df.loc[final_df['NAME'] == 'Pitcairn', 'population'] = 67
final_df.loc[final_df['NAME'] == 'Tokelau', 'population'] = 1411
final_df.loc[final_df['NAME'] == 'Wallis and Futuna', 'population'] = 15289
final_df.loc[final_df['NAME'] == 'Falkland Islands', 'population'] = 2840
final_df.loc[final_df['NAME'] == 'South Georgia and South Sandwich Islands', 'population'] = 30
final_df.loc[final_df['NAME'] == 'Antarctica', 'population'] = 4490
final_df.loc[final_df['NAME'] == 'Saint Helena', 'population'] = 6600
final_df.loc[final_df['NAME'] == 'Anguilla', 'population'] = 15094
final_df.loc[final_df['NAME'] == 'Bonaire', 'population'] = 20104
final_df.loc[final_df['NAME'] == 'French Guiana', 'population'] = 290691
final_df.loc[final_df['NAME'] == 'Guadeloupe', 'population'] = 395700
final_df.loc[final_df['NAME'] == 'Martinique', 'population'] = 376480
final_df.loc[final_df['NAME'] == 'Montserrat', 'population'] = 5900
final_df.loc[final_df['NAME'] == 'Saint Barthelemy', 'population'] = 9131
final_df.loc[final_df['NAME'] == 'Saint Pierre and Miquelon', 'population'] = 5888
final_df.loc[final_df['NAME'] == 'Guernsey', 'population'] = 67052
final_df.loc[final_df['NAME'] == 'Jersey', 'population'] = 97857
final_df.loc[final_df['NAME'] == 'Jan Mayen', 'population'] = 10
final_df.loc[final_df['NAME'] == 'Bouvet Island', 'population'] = 10
final_df.loc[final_df['NAME'] == 'British Indian Ocean Territory', 'population'] = 3000
final_df.loc[final_df['NAME'] == 'French Southern Territories', 'population'] = 145
final_df.loc[final_df['NAME'] == 'Heard Island and McDonald Islands', 'population'] = 10
final_df.loc[final_df['NAME'] == 'Mayotte', 'population'] = 270372
final_df.loc[final_df['NAME'] == 'Réunion', 'population'] = 859959
final_df.loc[final_df['NAME'] == 'Eritrea', 'population'] = 3546421
final_df.loc[final_df['NAME'] == 'Vatican City', 'population'] = 825
final_df.loc[final_df['NAME'] == 'Christmas Island', 'population'] = 1402
final_df.loc[final_df['NAME'] == 'Cocos Islands', 'population'] = 596
final_df.loc[final_df['NAME'] == 'Norfolk Island', 'population'] = 2169
final_df.loc[final_df['NAME'] == 'Kosovo', 'population'] = 1810366
final_df.loc[final_df['NAME'] == 'Kosovo', 'population'] = 1810366
final_df.loc[final_df['NAME'] == 'Others', 'population'] = 10000000

In [33]:
# Calculate weekly change rates of cases and form a time-series list
final_df['change_ts'] = ""
for x in range(0, len(final_df)):
    newCaseList = [0]
    changeRateList = []
    caseStrList = final_df.loc[x,'cases_ts'].split(",")
    for i in range(1, len(caseStrList)):
        dailyNewCase = float(caseStrList[i]) - float(caseStrList[i-1])
        newCaseList.append(dailyNewCase)
    for i in range(0, len(newCaseList)):
        if i < 13:
            changeRate = 0
        else:
            currentWeekSum = 0
            previousWeekSum = 0
            for j in range(0,7):
                currentWeekSum = currentWeekSum + newCaseList[i-j]
            for k in range(7,14):
                previousWeekSum = previousWeekSum + newCaseList[i-k]
            if previousWeekSum == 0 and currentWeekSum == 0:
                changeRate = 0
            elif previousWeekSum == 0 and currentWeekSum != 0:
                changeRate = currentWeekSum
            else:
                changeRate = round(currentWeekSum/previousWeekSum-1,2)
        changeRateList.append(changeRate)
        changeRateStrList = list(map(str, changeRateList))
#     print(','.join(changeRateStrList))
    final_df.loc[x,'change_ts'] = ','.join(changeRateStrList)

final_df

Unnamed: 0,ISO_2DIGIT,NAME,population,geometry,country_code,cases_ts,deaths_ts,dt_first_case,dt_first_death,today_case,...,today_death,today_new_death,yesterday_case,yesterday_new_case,yesterday_death,yesterday_new_death,start,end,dt_unit,change_ts
0,AS,American Samoa,55312,"POLYGON ((-170.744 -14.376, -170.823 -14.324, ...",AS,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-03,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-03,2021-02-17,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
1,UM,United States Minor Outlying Islands,300,"MULTIPOLYGON (((-160.021 -0.398, -160.043 -0.3...",,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-03,2021-02-17,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
2,CK,Cook Islands,15200,"MULTIPOLYGON (((-159.747 -21.257, -159.833 -21...",CK,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-03,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-03,2021-02-17,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
3,PF,French Polynesia,279287,"MULTIPOLYGON (((-149.179 -17.871, -149.276 -17...",PF,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-03-12,2020-09-11,18299.0,...,136.0,1.0,18293.0,30.0,135.0,0.0,2020-01-03,2021-02-17,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
4,NU,Niue,1624,"POLYGON ((-169.894 -19.146, -169.931 -19.124, ...",NU,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-03,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-03,2021-02-17,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
241,PW,Palau,18008,"MULTIPOLYGON (((134.531 7.354, 134.485 7.438, ...",PW,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-03,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2020-01-03,2021-02-17,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
242,RU,Russian Federation,144373535,"MULTIPOLYGON (((131.873 42.957, 131.824 42.953...",RU,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-01-31,2020-03-26,4112151.0,...,81446.0,467.0,4099323.0,13233.0,80979.0,459.0,2020-01-03,2021-02-17,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
243,ES,Spain,47076781,"MULTIPOLYGON (((-17.911 27.774, -17.983 27.638...",ES,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-02-02,2020-02-13,3096343.0,...,65979.0,54.0,3092486.0,3078.0,65925.0,134.0,2020-01-03,2021-02-17,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
244,XK,Kosovo,1810366,"POLYGON ((21.160 42.664, 21.160 42.657, 21.169...",XK,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-03-13,2020-03-22,64725.0,...,1538.0,3.0,64476.0,178.0,1535.0,1.0,2020-01-03,2021-02-17,day,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."


Step 7:  
Output file

In [34]:
# Save file
final_df.to_file("who_world_data.geojson", driver='GeoJSON', encoding='utf-8')
print("done")

done
