In [6]:
# Dependencies
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import io
import time
from datetime import datetime, timedelta
import json
import pprint
pp = pprint.PrettyPrinter(indent=4)
pd.set_option('display.max_columns', None)

rFolder = "C:/Users/janin/Downloads/"
dataFolder = "D:/Repositories/Global-COVID-Surveillance/data/"
cleanedFolder = dataFolder + "cleaned/"

file_end = " -Results- 2020-12-21 .csv"

In [7]:
# Read starter

pgmm = pd.read_excel(dataFolder + "Starter.xlsx")
print(pgmm.columns)
pgmm.head()

# Read R results

pgmm_files = [
    'Central Asia',
    'East Asia and Pacific',
    'Europe',
    'Latin America',
    'Middle East and North Africa',
    'United States',
    'Canada',
    'South Asia',
    'Sub-Saharan Africa'
]

for r in pgmm_files:
    pgmm_read_file = cleanedFolder + r + file_end
    df = pd.read_csv(pgmm_read_file)
    df = df.drop(["Unnamed: 0"], axis=1)
    df = df[df["V1"] != 0]
    if ((r == "Canada") or (r == "United States")):
        df["Region"] = "North America"
        df.rename(columns = {"V2": "State/Province"}, inplace=True)
        df["Country"] = r
        df["Level"] = df["State/Province"].apply(lambda x: "Country" if (x == "Region") else "State/Province")
        df["State/Province"] = df["State/Province"].apply(lambda x: "" if (x == "Region") else x)
    else:
        df["Region"] = r
        df.rename(columns = {"V2": "Country"}, inplace=True)
        df["Level"] = df["Country"].apply(lambda x: "Region" if (x == "Region") else "Country")
        df["Country"] = df["Country"].apply(lambda x: "" if (x == "Region") else x)
        df["State/Province"] = ""
    df.rename(columns = {
        "V1": 'Last Day of Week Excel Date',
        "V3": 'Cases Daily Last Day of Week',
        "V4": 'Total Cases Last Day of Week',
        "V5": 'Cases 7D Moving Average',
        "V6": 'Cases Last Day of Week Rate 100K',
        "V7": 'Deaths Daily Last Day of Week',
        "V8": 'Total Deaths Last Day of Week',
        "V9": 'Deaths 7D Moving Average',
        "V10": 'Deaths Last Day of Week Rate 100K',
        "V11": 'Speed', #Cases Last Day of Week Rate 100K 7D Moving Average
        "V12": 'Acceleration', #Average Daily Change in Speed
        "V13": 'Jerk', #Average Daily Change in Acceleration
        "V14": '1-Day Persistence', 
        "V15": '7-Day Persistence'
#        "Level": "Statistics Level",
#        "Region": "Statistics Region",
#        "Country": "Statistics Country",
#        "State/Province": "Statistics State/Province"
    }, inplace=True)
    pgmm = pd.concat([pgmm, df], ignore_index=True, sort=False)

# Create Time Variables
pgmm["Last Day of Week Excel Date"] = pgmm["Last Day of Week Excel Date"].astype(int)
pgmm["Time"] = pgmm["Last Day of Week Excel Date"].apply(lambda x: datetime.fromordinal(datetime(1900, 1, 1).toordinal() + x - 2))
pgmm["Week"] = pgmm["Time"].apply(lambda x: x.strftime('%Y W%V'))
pgmm["Output"] = True
pgmm_order = [
    'Region', 'Country', 'State/Province', 
    'Last Day of Week Excel Date', 'Time', 'Week',
    'Speed', 'Acceleration', 'Jerk', '7-Day Persistence', '1-Day Persistence',
    'Cases Daily Last Day of Week', 'Total Cases Last Day of Week',
    'Cases 7D Moving Average', 'Cases Last Day of Week Rate 100K',
    'Deaths Daily Last Day of Week', 'Total Deaths Last Day of Week',
    'Deaths 7D Moving Average', 'Deaths Last Day of Week Rate 100K', 'Output'
]
pgmm = pgmm[pgmm_order]
pgmm.head()

Index(['Level', 'Region', 'Country', 'State/Province', 'Excel Date'], dtype='object')


Unnamed: 0,Region,Country,State/Province,Last Day of Week Excel Date,Time,Week,Speed,Acceleration,Jerk,7-Day Persistence,1-Day Persistence,Cases Daily Last Day of Week,Total Cases Last Day of Week,Cases 7D Moving Average,Cases Last Day of Week Rate 100K,Deaths Daily Last Day of Week,Total Deaths Last Day of Week,Deaths 7D Moving Average,Deaths Last Day of Week Rate 100K,Output
0,Central Asia,Armenia,,44141,2020-11-06,2020 W45,69.137573,-0.906343,-1.050972,62.714082,8.172587,2210.0,101773.0,2048.714286,74.580451,30.0,1506.0,29.428571,1.012404,True
1,Central Asia,Armenia,,44148,2020-11-13,2020 W46,60.792469,-2.444233,-1.08954,63.686635,7.378334,1703.0,114383.0,1801.428571,57.470818,29.0,1697.0,27.285714,0.978658,True
2,Central Asia,Armenia,,44155,2020-11-20,2020 W47,44.656672,-0.173555,2.77688,55.999474,5.230703,1667.0,123646.0,1323.285714,56.255933,30.0,1900.0,29.0,1.012404,True
3,Central Asia,Armenia,,44162,2020-11-27,2020 W48,41.942464,-0.920806,-2.198364,41.135855,5.001202,1476.0,132346.0,1242.857143,49.810292,22.0,2090.0,27.142857,0.74243,True
4,Central Asia,Armenia,,44169,2020-12-04,2020 W49,35.414867,-1.407724,1.04133,32.476041,3.906459,1184.0,139692.0,1049.428571,39.956224,23.0,2277.0,26.714286,0.776177,True


In [8]:
input_df = pd.read_excel(cleanedFolder + "all_raw_input.xlsx")
input_df.drop(columns=["MM-DD-YYYY","DD-MM-YYYY"],inplace=True)
input_df["State/Province"] = input_df["State/Province"].astype(str)
input_df["State/Province"] = input_df["State/Province"].apply(lambda x: "" if x == "nan" else x)
input_df["Census Region"] = input_df["Census Region"].astype(str)
input_df["Census Region"] = input_df["Census Region"].apply(lambda x: "" if x == "nan" else x)
input_df["Status"] = input_df["Status"].astype(str)
input_df["Status"] = input_df["Status"].apply(lambda x: "" if x == "nan" else x)
input_df["Data Quality"] = input_df["Data Quality"].astype(str)
input_df["Data Quality"] = input_df["Data Quality"].apply(lambda x: "" if x == "nan" else x)
input_df.head()

Unnamed: 0,Level,Region,Country,Census Region,State/Province,Abbreviation,FIPS,Time,Date,Week,Status,Data Quality,Cases Daily,Cases Daily 7D Rolling,Total Cases,Cases Daily Rate,Cases Daily Rate 7D Rolling,Total Cases Rate,Tests Daily,Tests Daily 7D Rolling,Total Tests,Tests Daily Rate,Tests Daily Rate 7D Rolling,Total Tests Rate,Deaths Daily,Deaths Daily 7D Rolling,Total Deaths,Deaths Daily Rate,Deaths Daily Rate 7D Rolling,Total Deaths Rate,Positivity 7D Rolling,Speed Daily,Acceleration Daily,Jerk Daily,Negative Daily,Total Negative,Hospitalized Daily,Total Hospitalized,Currently Hospitalized,ICU Daily,Total ICU,Currently In ICU,Total In ICU,Currently On Ventilator,Total On Ventilator,Recovered Daily,Total Recovered,Active Daily,Total Active,Population,Population 100K,Country Population,Country Population 100K,Country Share,Region Population,Region Population 100K,Region Share,World Population,World Population 100K,World Share,World Share (%),Urban Population (%),Annual Change (%),Net Change,Migrants (net),Density (P/Km²),Land Area (Km²),Fertility Rate,Median Age,< 1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85+,1-4,5-14,15-24,25-34,35-44,45-54,55-64,65-74,75-84,Pct < 1,Pct 1-4,Pct 5-14,Pct 15-24,Pct 25-34,Pct 35-44,Pct 45-54,Pct 55-64,Pct 65-74,Pct 75-84,Pct 85+,Accessed
0,Country,Central Asia,Armenia,,,AM,,2020-03-01,03/01/2020,2020 W09,,,1,,1,0.033747,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020
1,Country,Central Asia,Armenia,,,AM,,2020-03-02,03/02/2020,2020 W10,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020
2,Country,Central Asia,Armenia,,,AM,,2020-03-03,03/03/2020,2020 W10,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020
3,Country,Central Asia,Armenia,,,AM,,2020-03-04,03/04/2020,2020 W10,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020
4,Country,Central Asia,Armenia,,,AM,,2020-03-05,03/05/2020,2020 W10,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020


In [10]:
all_df = input_df.merge(pgmm,how="left",on=['Region', 'Country', 'State/Province', 'Time', 'Week'])
for col in all_df.columns:
    print(str(col) + ",")
all_order = [
    'Level', 'Region', 'Country', 'State/Province', 'Census Region',
    'Time','Date','Week','Last Day of Week Excel Date', 'Output',
    'Speed', 'Acceleration', 'Jerk', '7-Day Persistence', '1-Day Persistence',
    'Cases Daily', 'Total Case', 'Cases Daily Last Day of Week', 'Total Cases Last Day of Week',
    'Cases 7D Moving Average', 'Cases Last Day of Week Rate 100K',
    'Deaths Daily', 'Total Deaths', 'Deaths Daily Last Day of Week', 'Total Deaths Last Day of Week',
    'Deaths 7D Moving Average', 'Deaths Last Day of Week Rate 100K',
    'Negative Daily', 'Total Negative',
    'Hospitalized Daily', 'Total Hospitalized', 'Currently Hospitalized',
    'ICU Daily', 'Total ICU',
    'Currently In ICU', 'Total In ICU',
    'Currently On Ventilator',  'Total On Ventilator',
    'Recovered Daily', 'Total Recovered',
    'Active Daily', 'Total Active',
    "Population","Population 100K",
    "Country Population","Country Population 100K", "Country Share",
    "Region Population","Region Population 100K", "Region Share",
    "World Population", "World Population 100K", "World Share",
    'World Share (%)', 'Urban Population (%)', 'Annual Change (%)', 'Net Change', 'Migrants (net)', 'Density (P/Km²)',
    'Land Area (Km²)', 'Fertility Rate', 'Median Age',
    '< 1', 'Pct < 1',
    1, 2, 3, 4, '1-4', 'Pct 1-4',
    5, 6, 7, 8, 9, 10, 11, 12, 13, 14, '5-14', 'Pct 5-14',
    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, '15-24', 'Pct 15-24',
    25, 26, 27, 28, 29, 30, 31, 32, 33, 34, '25-34', 'Pct 25-34',
    35, 36, 37, 38, 39, 40, 41, 42, 43, 44, '35-44', 'Pct 35-44',
    45, 46, 47, 48, 49, 50, 51, 52, 53, 54, '45-54', 'Pct 45-54',
    55, 56, 57, 58, 59, 60, 61, 62, 63, 64, '55-64', 'Pct 55-64',
    65, 66, 67, 68, 69, 70, 71, 72, 73, 74, '65-74', 'Pct 65-74',
    75, 76, 77, 78, 79, 80, 81, 82, 83, 84, '75-84', 'Pct 75-84',
    '85+', 'Pct 85+'
]
all_df.head(14)

Level,
Region,
Country,
Census Region,
State/Province,
Abbreviation,
FIPS,
Time,
Date,
Week,
Status,
Data Quality,
Cases Daily,
Cases Daily 7D Rolling,
Total Cases,
Cases Daily Rate,
Cases Daily Rate 7D Rolling,
Total Cases Rate,
Tests Daily,
Tests Daily 7D Rolling,
Total Tests,
Tests Daily Rate,
Tests Daily Rate 7D Rolling,
Total Tests Rate,
Deaths Daily,
Deaths Daily 7D Rolling,
Total Deaths,
Deaths Daily Rate,
Deaths Daily Rate 7D Rolling,
Total Deaths Rate,
Positivity 7D Rolling,
Speed Daily,
Acceleration Daily,
Jerk Daily,
Negative Daily,
Total Negative,
Hospitalized Daily,
Total Hospitalized,
Currently Hospitalized,
ICU Daily,
Total ICU,
Currently In ICU,
Total In ICU,
Currently On Ventilator,
Total On Ventilator,
Recovered Daily,
Total Recovered,
Active Daily,
Total Active,
Population,
Population 100K,
Country Population,
Country Population 100K,
Country Share,
Region Population,
Region Population 100K,
Region Share,
World Population,
World Population 100K,
World Share,
World Sh

Unnamed: 0,Level,Region,Country,Census Region,State/Province,Abbreviation,FIPS,Time,Date,Week,Status,Data Quality,Cases Daily,Cases Daily 7D Rolling,Total Cases,Cases Daily Rate,Cases Daily Rate 7D Rolling,Total Cases Rate,Tests Daily,Tests Daily 7D Rolling,Total Tests,Tests Daily Rate,Tests Daily Rate 7D Rolling,Total Tests Rate,Deaths Daily,Deaths Daily 7D Rolling,Total Deaths,Deaths Daily Rate,Deaths Daily Rate 7D Rolling,Total Deaths Rate,Positivity 7D Rolling,Speed Daily,Acceleration Daily,Jerk Daily,Negative Daily,Total Negative,Hospitalized Daily,Total Hospitalized,Currently Hospitalized,ICU Daily,Total ICU,Currently In ICU,Total In ICU,Currently On Ventilator,Total On Ventilator,Recovered Daily,Total Recovered,Active Daily,Total Active,Population,Population 100K,Country Population,Country Population 100K,Country Share,Region Population,Region Population 100K,Region Share,World Population,World Population 100K,World Share,World Share (%),Urban Population (%),Annual Change (%),Net Change,Migrants (net),Density (P/Km²),Land Area (Km²),Fertility Rate,Median Age,< 1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85+,1-4,5-14,15-24,25-34,35-44,45-54,55-64,65-74,75-84,Pct < 1,Pct 1-4,Pct 5-14,Pct 15-24,Pct 25-34,Pct 35-44,Pct 45-54,Pct 55-64,Pct 65-74,Pct 75-84,Pct 85+,Accessed,Last Day of Week Excel Date,Speed,Acceleration,Jerk,7-Day Persistence,1-Day Persistence,Cases Daily Last Day of Week,Total Cases Last Day of Week,Cases 7D Moving Average,Cases Last Day of Week Rate 100K,Deaths Daily Last Day of Week,Total Deaths Last Day of Week,Deaths 7D Moving Average,Deaths Last Day of Week Rate 100K,Output
0,Country,Central Asia,Armenia,,,AM,,2020-03-01,03/01/2020,2020 W09,,,1,,1,0.033747,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020,,,,,,,,,,,,,,,
1,Country,Central Asia,Armenia,,,AM,,2020-03-02,03/02/2020,2020 W10,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020,,,,,,,,,,,,,,,
2,Country,Central Asia,Armenia,,,AM,,2020-03-03,03/03/2020,2020 W10,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020,,,,,,,,,,,,,,,
3,Country,Central Asia,Armenia,,,AM,,2020-03-04,03/04/2020,2020 W10,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020,,,,,,,,,,,,,,,
4,Country,Central Asia,Armenia,,,AM,,2020-03-05,03/05/2020,2020 W10,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020,,,,,,,,,,,,,,,
5,Country,Central Asia,Armenia,,,AM,,2020-03-06,03/06/2020,2020 W10,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020,,,,,,,,,,,,,,,
6,Country,Central Asia,Armenia,,,AM,,2020-03-07,03/07/2020,2020 W10,,,0,0.142857,1,0.0,0.004821,0.033747,0,0.0,0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020,,,,,,,,,,,,,,,
7,Country,Central Asia,Armenia,,,AM,,2020-03-08,03/08/2020,2020 W10,,,0,0.0,1,0.0,0.0,0.033747,0,0.0,0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,,-0.142857,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020,,,,,,,,,,,,,,,
8,Country,Central Asia,Armenia,,,AM,,2020-03-09,03/09/2020,2020 W11,,,0,0.0,1,0.0,0.0,0.033747,0,0.0,0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,,0.0,0.142857,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020,,,,,,,,,,,,,,,
9,Country,Central Asia,Armenia,,,AM,,2020-03-10,03/10/2020,2020 W11,,,0,0.0,1,0.0,0.0,0.033747,0,0.0,0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,,0.0,0.0,-0.142857,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12/21/2020,,,,,,,,,,,,,,,


In [None]:
all_out_file = cleanedFolder + "all_out.xlsx"
print(all_out_file)
all_df.to_excel(all_out_file, index=False)

In [5]:
# Output cleaned dataset
pgmm_out = pgmm.merge(populations,how="left",left_on=['Statistics Region', 'Statistics Country', 'Statistics State/Province'],right_on=['Region', 'Country', 'State/Province'])
pgmm_out.rename(columns={
    "Population":"Level Population",
    "Population 100K":"Level Population 100K"
},inplace=True)
stats = ['7-Day Persistence','Speed', 'Acceleration', 'Jerk']
for stat in stats:
    pgmm_out["Region Weighted " + stat] = pgmm_out[stat]*pgmm_out["Region Share"]
pgmm_out_order = [
    'Statistics Level', 'Statistics Region', 'Statistics Country', 'Statistics State/Province', 'Census Region',
    'Last Day of Week Excel Date', 'Last Day of Week', 'ISO Week',
    'Speed', 'Acceleration', 'Jerk', '7-Day Persistence', '1-Day Persistence',
    'Cases Daily Last Day of Week', 'Total Cases Last Day of Week',
    'Cases 7D Moving Average', 'Cases Last Day of Week Rate 100K',
    'Deaths Daily Last Day of Week', 'Total Deaths Last Day of Week',
    'Deaths 7D Moving Average', 'Deaths Last Day of Week Rate 100K',
    "Level Population","Level Population 100K",
    "Country Population","Country Population 100K", "Country Share",
    "Region Population","Region Population 100K", "Region Share",
    "World Population", "World Population 100K", "World Share",
    'World Share (%)', 'Urban Population (%)', 'Annual Change (%)', 'Net Change', 'Migrants (net)', 'Density (P/Km²)',
    'Land Area (Km²)', 'Fertility Rate', 'Median Age',
    '< 1', 1, 2, 3, 4, 5, 6, 7, 8, 9,
    10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
    20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
    30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
    40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
    50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
    60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
    70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
    80, 81, 82, 83, 84, '85+',
    '1-4','5-14','15-24','25-34','35-44','45-54','55-64','65-74','75-84',
    'Pct < 1','Pct 1-4','Pct 5-14','Pct 15-24','Pct 25-34','Pct 35-44','Pct 45-54','Pct 55-64','Pct 65-74','Pct 75-84','Pct 85+'
]
pgmm_out = pgmm_out[pgmm_out_order]
pgmm_out.head()

Unnamed: 0,Statistics Level,Statistics Region,Statistics Country,Statistics State/Province,Census Region,Last Day of Week Excel Date,Last Day of Week,ISO Week,Speed,Acceleration,Jerk,7-Day Persistence,1-Day Persistence,Cases Daily Last Day of Week,Total Cases Last Day of Week,Cases 7D Moving Average,Cases Last Day of Week Rate 100K,Deaths Daily Last Day of Week,Total Deaths Last Day of Week,Deaths 7D Moving Average,Deaths Last Day of Week Rate 100K,Level Population,Level Population 100K,Country Population,Country Population 100K,Country Share,Region Population,Region Population 100K,Region Share,World Population,World Population 100K,World Share,World Share (%),Urban Population (%),Annual Change (%),Net Change,Migrants (net),Density (P/Km²),Land Area (Km²),Fertility Rate,Median Age,< 1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85+,1-4,5-14,15-24,25-34,35-44,45-54,55-64,65-74,75-84,Pct < 1,Pct 1-4,Pct 5-14,Pct 15-24,Pct 25-34,Pct 35-44,Pct 45-54,Pct 55-64,Pct 65-74,Pct 75-84,Pct 85+
0,Country,Central Asia,Armenia,,,44136,2020-11-01,2020 W44,69.503967,0.612264,-0.19766,55.167961,13.862791,2441.0,92254.0,2059.571429,82.375964,22.0,1363.0,26.142857,0.74243,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Country,Central Asia,Armenia,,,44143,2020-11-08,2020 W45,68.313186,-1.282379,-1.740371,60.515597,14.004426,2175.0,106424.0,2024.285714,73.399313,26.0,1559.0,28.0,0.877417,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Country,Central Asia,Armenia,,,44150,2020-11-15,2020 W46,52.611278,-3.340934,1.499323,59.47881,11.259031,1482.0,117337.0,1559.0,50.012773,25.0,1763.0,29.142857,0.84367,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Country,Central Asia,Armenia,,,44157,2020-11-22,2020 W47,42.843986,-0.467634,0.877417,45.807499,8.715417,1385.0,126224.0,1269.571429,46.739333,21.0,1952.0,27.0,0.708683,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Country,Central Asia,Armenia,,,44164,2020-11-29,2020 W48,41.190393,-1.017225,-1.282379,38.338748,7.231267,1174.0,134768.0,1220.571429,39.618756,21.0,2142.0,27.142857,0.708683,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [6]:
pgmm_out_file = cleanedFolder + "pgmm.xlsx"
print(pgmm_out_file)
pgmm_out.to_excel(pgmm_out_file, index=False)

D:/Repositories/Global-COVID-Surveillance/data/cleaned/pgmm.xlsx
