In [297]:
# Dependencies
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import io
import time
from datetime import datetime, timedelta
import json
import pprint
pp = pprint.PrettyPrinter(indent=4)
pd.set_option('display.max_columns', None)

In [298]:
repositoryFolder = "D:/Repositories/Global-COVID-Surveillance/data/"
local_download = "C:/Users/janin/Downloads/"
demographicsFolder = repositoryFolder + "raw/demographics/"
configuredFolder = repositoryFolder + "configured/"
cleanedFolder = repositoryFolder + "cleaned/"
regionsFolder = repositoryFolder + "raw/regions/"
locationsFolder = repositoryFolder + "raw/locations/"

population_source_url ="https://www.worldometers.info/world-population/population-by-country/"
populationsFile = demographicsFolder + "Country Populations 2020.xlsx"
print(populationsFile)
# Statistics Canada Quarterly Population
canada_population_source = "https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1710000901"
canada_population = demographicsFolder + "Canada Population.xlsx"
us_population_source = "https://www2.census.gov/programs-surveys/popest/tables/2010-2019/state/asrh/sc-est2019-agesex-civ.csv"
us_population = demographicsFolder + "US Population.xlsx"
populations_file = cleanedFolder + "Populations_cleaned.xlsx"

github_url="https://github.com/dsbbfinddx/FINDCov19TrackerData/blob/master/processed/data_all.csv?raw=true"
country_codes_coordinates = "https://raw.githubusercontent.com/dsbbfinddx/FINDCov19TrackerData/master/raw/countries_codes_and_coordinates.csv"
countries_geo = "https://raw.githubusercontent.com/dsbbfinddx/FINDCov19TrackerData/master/raw/countries.geo.json"
us_states_geo = "https://raw.githubusercontent.com/dsbbfinddx/FINDCov19TrackerData/master/raw/us-states.geo.json"

kaggle_locations = "https://www.kaggle.com/paultimothymooney/latitude-and-longitude-for-every-country-and-state"
locationsFile = demographicsFolder + "Country Geo.xlsx"

canada_source_csv = "https://opendata.arcgis.com/datasets/3afa9ce11b8842cb889714611e6f3076_0.csv"
us_source_csv = "https://covidtracking.com/data/download/all-states-history.csv"

south_africa_r = regionsFolder + "SSA-Temp.xlsx"
south_asia_r = regionsFolder + "SouthAsia excel updated 20201008.xlsx"
latin_america_r = regionsFolder + "LatinAmerica.xlsx"
central_asia_r = regionsFolder + "CentralAsia-Results.xlsx"
europe_r = regionsFolder + "Europe-Results-Updated.xlsx"
middle_east_r = regionsFolder + "Middle East Output.xlsx"
east_asia_pacific_r = regionsFolder + "East Asia and Pacific output.xlsx"
canada_r = regionsFolder + "_Canada Ouput.xlsx"
 
us_r = regionsFolder + "USState-Results.xlsx"
us_codes = demographicsFolder + "US State Codes.xlsx"

D:/Repositories/Global-COVID-Surveillance/data/raw/demographics/Country Populations 2020.xlsx


In [460]:
def titleCase(words):
    if len(words) > 3:
        titlecased = ""
        wordsArray = words.lower().split(" ")
        for word in wordsArray:
            if len(titlecased) > 0 :
                titlecased = titlecased + " "
            if word == "and":
                titlecased = titlecased + "and"
            else:
                titlecased = titlecased + word.capitalize()
        return titlecased
    else:
        return words.upper()

def fixRegion(code):
    region_name = ""
    for region in census_regions:
        if region["number"] == code:
            region_name = region["name"]
            break
    if region_name == "":
        region_name = "Other"
        print(str(code) + " not found")
    return region_name

# CDC Standard age ranges 0-17, 18-29, 30-49, and 50-64
# CDC COVID Reporting Age Ranges https://www.cdc.gov/nchs/nvss/vsrr/covid_weekly/index.htm
def getAgeRange(age):
    age_range = ""
    if age == 0:
        age_range = "< 1"
    elif age == 999:
        age_range = "Total"
    elif age < 5:
        age_range = "1-4"
    elif age < 15:
        age_range = "5-14"
    elif age < 25:
        age_range = "15-24"
    elif age < 35:
        age_range = "25-34"
    elif age < 45:
        age_range = "35-44"
    elif age < 55:
        age_range = "45-54"
    elif age < 65:
        age_range = "55-64"
    elif age < 75:
        age_range = "65-74"
    elif age < 85:
        age_range = "75-84"
    elif age == 85:
        age_range = "85+"
    return age_range

def fixSex(code):
    sex = ""
    if code == 0:
        sex = "Population 2019"
    elif code == 1:
        sex = "Male"
    elif code == 2:
        sex = "Female"
    else:
        print(str(code) + " is not a sex")
    return sex

def us_date(x):
    month = x[5:7]
    day = x[8:11]
    year = x[0:4]
    conversion = month + "/" + day +"/"+ year
    return conversion

def removeDecimal(data):
    strData = str(data)
    decimalLocation = strData.find(".")
    if decimalLocation > -1:
        return strData[0:decimalLocation]
    else:
        return strData

def emptyNan(value):
    if (value == "nan"):
        return ""
    else:
        return value

def printColumns(df, label):
    print(label)
    print(df.columns)

def print_column_unique(column):
    print("Column Values:")
    values = column.sort_values(ascending = True).unique()
    print(values)
    return values

def print_column_missing(column, comparison):
    values = print_column_unique(column)
    print("Comparison:")
    print(comparison)
    missing_values = []
    for value in values:
        if not value in comparison:
            missing_values.append(value)
    if len(missing_values) > 0:
        print("Column values not in comparison:")
        print(missing_values)
    else:
        print("No missing values")
    missing_values = []
    for value in comparison:
        if not value in values:
            missing_values.append(value)
    if len(missing_values) > 0:
        print("Comparison values not in column:")
        print(missing_values)
    else:
        print("No missing values")
    return values

def division(a,b):
    if not (b == 0):
        return a/b
    else:
        return np.nan

In [300]:
# Countries and Regions

european_countries = [
    'Albania','Andorra','Austria','Belarus','Belgium','Bosnia & Herzegovina','Bulgaria',
    'Croatia','Czech Republic','Denmark','Estonia','Finland','France',
    'Germany','Greece','Greenland','Hungary','Iceland','Ireland','Isle of Man','Italy',
    'Latvia','Liechtenstein','Lithuania','Luxembourg','Malta','Moldova','Monaco','Montenegro',
    'Netherlands','Norway','Poland','Portugal','Romania',
    'San Marino','Serbia','Slovakia','Slovenia','Spain','Sweden','Switzerland',
    'Ukraine','United Kingdom','Vatican City'
]
carribean_countries = [
    "Antigua & Barbuda","Aruba","Bahamas","Barbados","Bermuda","British Virgin Islands",
    "Cayman Islands","Cuba","Curacao","Dominica","Dominican Republic","Grenada",
    "Haiti","Jamaica","Puerto Rico","St. Kitts & Nevis","St. Lucia","St. Vincent & Grenadines",
    "Sint Maarten","Trinidad & Tobago","Turks and Caicos Islands","United States Virgin Islands"
]
central_south_america_countries = [
    'Argentina','Belize','Bolivia','Brazil','Chile','Colombia','Costa Rica',
    'Ecuador','El Salvador','Guatemala','Guyana','Honduras',
    'Mexico','Nicaragua','Panama','Paraguay','Peru','Suriname','Uruguay','Venezuela'
]
latin_american_countries = carribean_countries + central_south_america_countries
sub_saharan_african_countries = [
    "Angola","Benin","Botswana","Burkina Faso","Burundi",
    "Cabo Verde","Cameroon","Central African Republic","Chad","Comoros","Côte d’Ivoire",
    "Democratic Republic of Congo","Equatorial Guinea","Eritrea","Ethiopia",
    "Gabon","Gambia","Ghana","Guinea","Guinea-Bissau","Kenya","Lesotho","Liberia",
    "Madagascar","Malawi","Mali","Mauritania","Mauritius","Mozambique",
    "Namibia","Niger","Nigeria","Republic of the Congo","Rwanda",
    "São Tomé and Príncipe","Senegal","Seychelles","Sierra Leone",
    "Somalia","South Africa","South Sudan","Sudan","Swaziland",
    "Tanzania","Togo","Uganda","Zambia","Zimbabwe"
]
south_asia_countries = [
    "Afghanistan","Bangladesh","Bhutan","India","Maldives","Nepal","Pakistan","Sri Lanka"
]
central_asian_countries = [
    'Armenia','Azerbaijan','Cyprus','Faeroe Islands','Georgia','Gibraltar','Kazakhstan','Kosovo','Kyrgyzstan',
    'North Macedonia','Russia','Tajikistan','Turkey','Turkmenistan','Uzbekistan'
]
east_asian_countries = [
    "Brunei","Cambodia","China","Indonesia","Japan","Laos","Malaysia","Mongolia","Myanmar","Niue","North Korea","Philippines",
    "Singapore","South Korea","Taiwan","Thailand","Timor","Vietnam"
]
pacific_countries = [
    "Australia","Cook Islands","Fiji","French Polynesia","Guam","Kiribati",
    "Marshall Islands","Micronesia","Nauru","New Caledonia","New Zealand",
    "Northern Mariana Islands","Palau","Papua New Guinea","Samoa","Solomon Islands","Tonga","Tuvalu","Vanuatu"
]
east_asia_and_pacific_countries = east_asian_countries + pacific_countries
middle_eastern_countries = [
    "Bahrain","Iran","Iraq","Israel","Jordan","Kuwait","Lebanon","Oman","Qatar","Saudi Arabia","Syria",
    "United Arab Emirates","Yemen"
]
north_african_countries = [
    "Algeria","Djibouti","Egypt","Libya","Morocco","Tunisia"
]
middle_east_and_north_africa_countries = middle_eastern_countries + north_african_countries 
north_american_countries = ["Canada","United States"]
configured_country_lists = [
    european_countries,
    latin_american_countries,
    sub_saharan_african_countries,
    south_asia_countries,
    central_asian_countries,
    middle_east_and_north_africa_countries,
    east_asia_and_pacific_countries
]
configured_countries = []
for country_list in configured_country_lists:
    for country in country_list:
        configured_countries.append(country)
print("Configured Countries")
configured_countries.sort()
print(configured_countries)
#print(configured_countries)
all_countries = configured_countries +  north_american_countries
all_countries.sort()
print("All Countries")
print(all_countries)
print("Configured Regions")
configured_regions = [
    'Central Asia',
    'East Asia and Pacific',
    'Europe',
    'Latin America',
    'Middle East and North Africa',
    'North America',
    'South Asia',
    'Sub-Saharan Africa'
]
print(configured_regions)
print("Country Conversions")
country_conversions = {
    "Antigua & Barbuda": ["Antigua and Barbuda"],
    "Bahamas": ["Bahamas, The"],
    "Bosnia & Herzegovina": ["Bosnia and Herzegovina"],
    "Brunei": ["Brunei Darussalam"],
    "Cabo Verde": ["Cape Verde"],
    "Côte d’Ivoire": ["Cote d'Ivoire","Cote dIvoire"],
    "Czech Republic": ["Czechia","Czech Republic (Czechia)"],
    "Democratic Republic of Congo": ["Congo - Kinshasa"],
    "Egypt": ["Egypt, Arab Rep."],
    "Faeroe Islands": ["Faroe Islands"],
    "Gambia": ["Gambia, The"],
    "Iran": ["Iran, Islamic Rep."],
    "Kyrgyzstan": ["Kyrgyz Republic"],
    "Laos": ["Lao PDR"],
    "Micronesia": ["Micronesia, Fed. Sts."],
    "Myanmar": ["Myanmar (Burma)","Burma"],
    "North Macedonia": ["Macedonia"],
    "Republic of the Congo": ["Congo - Brazzaville"],
    "Russia": ["Russian Federation"],
    "São Tomé and Príncipe": ["Sao Tome and Principe","Sao Tome & Príncipe","São Tomé & Príncipe"],
    "Sint Maarten": ["Sint Maarten (Dutch part)"],
    "Slovakia": ["Slovak Republic"],
    "St. Kitts & Nevis": ["Saint Kitts and Nevis"],
    "St. Lucia": ["Saint Lucia"],
    "St. Vincent & Grenadines": ["Saint Vincent and the Grenadines"],
    "Swaziland": ["Eswatini"],
    "Syria": ["Syrian Arab Republic"],
    "Timor": ["Timor-Leste"],
    "Trinidad & Tobago": ["Trinidad and Tobago"],
    "Vatican City": ["Holy See"],
    "Yemen": ["Yemen, Rep."],
    "" : ["nan"]
}
unincorporated_disputed_territories = [
    "American Samoa", "Anguilla","Caribbean Netherlands","Channel Islands","Curaçao",
    "Falkland Islands","French Guiana","Guadeloupe","Hong Kong"
]
print(country_conversions)
print("Countries by Region")
countries_by_region = {
    'Central Asia': central_asian_countries,
    'Europe': european_countries,
    'Latin America': latin_american_countries,
    'South Asia': south_asia_countries,
    'Sub-Saharan Africa': sub_saharan_african_countries,
    'Middle East and North Africa': middle_east_and_north_africa_countries,
    'East Asia and Pacific': east_asia_and_pacific_countries,
    'North America': north_american_countries
}
print(countries_by_region)

def key_from_value(value, dictionary, default):
    return_value = default
    for key, values in dictionary.items():
        if value.strip() in values:
            return_value = key
            break
    return return_value.strip()

def region_from_country(country):
    return key_from_value(country, countries_by_region, "")

def fixCountry(value):
    return key_from_value(value, country_conversions, value)

def fixCountries(countries_column, configuredCountries):
    countries_conversion = countries_column.astype(str)
    countries_conversion = countries_conversion.apply(lambda x: fixCountry(x))
    print(conversions)
    countries = print_column_missing(countries_conversion,configuredCountries)
    return countries_conversion

def testConversion(title, test_array, conversion):
    print(title)
    no_conversions = []
    for value in test_array:
        return_value = ""
        if conversion == "country":
            return_value = fixCountry(value)
        elif conversion == "region":
            return_value = region_from_country(fixCountry(value))
        if return_value != value.strip():
            print(value.strip() + "," + return_value)
        if return_value == "":
            no_conversions.append(value)
    if len(no_conversions) > 0:
        print("Missing Conversions")
        print(no_conversions)
    print("")

Configured Countries
['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua & Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bosnia & Herzegovina', 'Botswana', 'Brazil', 'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Cayman Islands', 'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Cook Islands', 'Costa Rica', 'Croatia', 'Cuba', 'Curacao', 'Cyprus', 'Czech Republic', 'Côte d’Ivoire', 'Democratic Republic of Congo', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Ethiopia', 'Faeroe Islands', 'Fiji', 'Finland', 'France', 'French Polynesia', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Gibraltar', 'Greece', 'Greenland', 'Grenada', 'Guam

In [301]:
input_countries = ['Afghanistan','Albania','Algeria','Andorra','Angola','Antigua & Barbuda',
 'Argentina','Armenia','Australia','Austria','Azerbaijan','Bahamas',
 'Bahrain','Bangladesh','Barbados','Belarus','Belgium','Belize','Benin',
 'Bermuda','Bhutan','Bolivia','Bosnia & Herzegovina','Botswana','Brazil',
 'Brunei','Bulgaria','Burkina Faso','Burundi','Cambodia','Cameroon',
 'Canada','Cape Verde','Central African Republic','Chad','Chile','China',
 'Colombia','Comoros','Congo - Brazzaville','Congo - Kinshasa',
 'Costa Rica','Croatia','Cuba','Cyprus','Czechia','Côte d’Ivoire',
 'Denmark','Djibouti','Dominica','Dominican Republic','Ecuador','Egypt',
 'El Salvador','Equatorial Guinea','Eritrea','Estonia','Eswatini',
 'Ethiopia','Faroe Islands','Fiji','Finland','France','French Polynesia',
 'Gabon','Gambia','Georgia','Germany','Ghana','Greece','Greenland',
 'Grenada','Guatemala','Guinea','Guinea-Bissau','Guyana','Haiti',
 'Honduras','Hong Kong SAR China','Hungary','Iceland','India','Indonesia',
 'Iran','Iraq','Ireland','Israel','Italy','Jamaica','Japan','Jordan',
 'Kazakhstan','Kenya','Kosovo','Kuwait','Kyrgyzstan','Laos','Latvia',
 'Lebanon','Lesotho','Liberia','Libya','Liechtenstein','Lithuania',
 'Luxembourg','Madagascar','Malawi','Malaysia','Maldives','Mali','Malta',
 'Mauritania','Mauritius','Mexico','Moldova','Monaco','Mongolia',
 'Montenegro','Morocco','Mozambique','Myanmar (Burma)','Namibia','Nepal',
 'Netherlands','New Caledonia','New Zealand','Nicaragua','Niger','Nigeria',
 'North Macedonia','Norway','Oman','Pakistan','Palestinian Territories',
 'Panama','Papua New Guinea','Paraguay','Peru','Philippines','Poland',
 'Portugal','Qatar','Romania','Russia','Rwanda','San Marino',
 'Saudi Arabia','Senegal','Serbia','Seychelles','Sierra Leone','Singapore',
 'Slovakia','Slovenia','Somalia','South Africa','South Korea',
 'South Sudan','Spain','Sri Lanka','St. Kitts & Nevis','St. Lucia',
 'St. Vincent & Grenadines','Sudan','Suriname','Sweden','Switzerland',
 'Syria','São Tomé & Príncipe','Taiwan','Tajikistan','Tanzania','Thailand',
 'Timor-Leste','Togo','Trinidad & Tobago','Tunisia','Turkey','Uganda',
 'Ukraine','United Arab Emirates','United Kingdom','United States',
 'Uruguay','Uzbekistan','Vatican City','Venezuela','Vietnam',
 'Western Sahara','Yemen','Zambia','Zimbabwe','nan']
 
#testConversion("Country Fixes",input_countries,"country")
#testConversion("Region Assignments",input_countries,"region")



In [302]:
print(populationsFile)
country_populations = pd.read_excel(populationsFile)
country_populations.rename(
    columns = {
        'Data Source':'Population Data Source'
    }, 
    inplace = True)
country_populations["Population (100K)"] = country_populations["Population"]/100000
country_populations["Population Source"] = population_source_url
conversions = {}
country_populations["Country"] = country_populations["Country"].astype(str)
country_populations["Country"] = country_populations["Country"].apply(lambda x: fixCountry(x))
print(conversions)
#country_populations["Migrants (net)"] = country_populations["Migrants (net)"].apply(lambda x: pd.to_numeric(x, errors='coerce', downcast='integer'))
#print_column_missing(country_populations["Country"],all_countries)
print(country_populations.columns)
country_populations_order = ['Country', 
                             'Population', 'Population (100K)', 
                             'World Share (%)', 'Urban Population (%)', 
                             'Annual Change (%)', 'Net Change', 'Migrants (net)', 
                             'Density (P/Km²)', 'Land Area (Km²)', 
                             'Fertility Rate', 'Median Age', 
                             'Population Source']
#print_column_missing(country_populations.columns, country_populations_order)
#print(country_populations_order)
country_populations = country_populations[country_populations_order]
country_populations.head()

D:/Repositories/Global-COVID-Surveillance/data/raw/demographics/Country Populations 2020.xlsx
{}
Index(['Rank', 'Country', 'Country Link', 'Year', 'Population',
       'Annual Change (%)', 'Net Change', 'Density (P/Km²)', 'Land Area (Km²)',
       'Migrants (net)', 'Fertility Rate', 'Median Age',
       'Urban Population (%)', 'World Share (%)', 'Population Data Source',
       'Population (100K)', 'Population Source'],
      dtype='object')


Unnamed: 0,Country,Population,Population (100K),World Share (%),Urban Population (%),Annual Change (%),Net Change,Migrants (net),Density (P/Km²),Land Area (Km²),Fertility Rate,Median Age,Population Source
0,Afghanistan,38928346,389.28346,0.5,25.0,2.33,886592,-62920.0,60,652860,4.6,18.0,https://www.worldometers.info/world-population...
1,Albania,2877797,28.77797,0.04,63.0,-0.11,-3120,-14000.0,105,27400,1.6,36.0,https://www.worldometers.info/world-population...
2,Algeria,43851044,438.51044,0.56,73.0,1.85,797990,-10000.0,18,2381740,3.1,29.0,https://www.worldometers.info/world-population...
3,American Samoa,55191,0.55191,0.0,88.0,-0.22,-121,,276,200,,,https://www.worldometers.info/world-population...
4,Andorra,77265,0.77265,0.0,88.0,0.16,123,,164,470,,,https://www.worldometers.info/world-population...


In [427]:
# Download input data
github_request=requests.get(github_url).content
c=pd.read_csv(io.StringIO(github_request.decode('utf-8')))
currentTime = datetime.now()

print("Columns")
print(c.columns)
print("Sets")
c["set"] = c["set"].astype(str)
sets = print_column_unique(c["set"])
print("Names")
c["name"] = c["name"].astype(str)
names = print_column_unique(c["name"])
print("Units")
c["unit"] = c["unit"].astype(str)
c["unit"] = c["unit"].apply(lambda x: "" if x=="unit" else x)
units = print_column_unique(c["unit"])
#print("Times")
c["time"] = c["time"].astype(str)
#times = print_column_unique(c["time"])

c["name"] = c["name"].astype(str)
c["name"] = c["name"].apply(lambda x: "" if x=="nan" else x)
conversions = {}
c["name"] = c["name"].astype(str)
c["name"] = c["name"].apply(lambda x: fixCountry(x))
print(conversions)
#print("Configured Countries")
#print_column_missing(c["name"],configured_countries)
#print("All Countries")
#print_column_missing(c["name"],all_countries)

c["region"] = c["name"].apply(lambda x: region_from_country(x))
#print("Regions")
#c_regions = print_column_missing(c["region"],configured_regions)

# Format text date and add datetime for date
c["time"] = c["time"].apply(lambda x: us_date(x))
c["date"] = pd.to_datetime(c["time"], format="%m/%d/%Y")
minmax_dates = c.groupby(["name"]).agg({"date": [np.min,np.max]})
#print(minmax_dates)
min_date = c["date"].min()
#print(min_date)
c.sort_values(by=['set','name','date'], inplace=True)

# Calculate changing cases
c["new_cases"] = c["all_cum_cases"].diff()
c["new_cases"] = np.where(c["new_cases"].notna(),c["new_cases"],c["all_cum_cases"])
#print(c[["time","date","all_cum_cases","new_cases"]])
c["new_deaths"] = c["all_cum_deaths"].diff()
c["new_deaths"] = np.where(c["new_deaths"].notna(),c["new_deaths"],c["all_cum_deaths"])
#print(c[["time","date","all_cum_deaths","new_deaths"]])
c["new_tests"] = c["all_cum_tests"].diff()
c["new_tests"] = np.where(c["new_tests"].notna(),c["new_tests"],c["all_cum_tests"])
#print(c[["time","date","all_cum_tests","new_tests"]])
c["new_negatives"] = ""
#print(c[["time","date","new_tests","new_cases","new_negatives"]])

# Format numeric columns
numeric_columns = ['pop_100k',
                   'new_cases_orig','new_deaths_orig','new_tests_orig',
                   'cap_cum_cases','cap_new_cases',
                   'cap_cum_deaths','cap_new_deaths',
                   'cap_cum_tests','cap_new_tests',
                   'all_cum_cases','all_new_cases','all_cum_deaths','all_new_deaths',
                   'all_cum_tests','all_new_tests',
                   'pos']
float_columns = ['pop_100k',
                 'cap_cum_cases','cap_new_cases','cap_cum_deaths',
                 'cap_new_deaths','cap_cum_tests','cap_new_tests']
integer_columns = ['new_cases_orig','new_deaths_orig','new_tests_orig',
                   'all_cum_cases','new_cases', 
                   'all_cum_deaths','new_deaths',
                   'all_cum_tests','all_new_tests','new_tests',
                   'pos']

c[float_columns] = c[float_columns].apply(pd.to_numeric)
c[integer_columns] = c[integer_columns].apply(lambda x: pd.to_numeric(x, errors='coerce', downcast='integer'))
has_data = c.all_cum_cases > 0
c = c[has_data]
c = c.where(c.notnull(), None)

# Add missing columns to match Google sheet
c["state"] = ""
c["county"] = ""
c["all_cum_neg"] = "" #c["all_cum_tests"] - c["all_cum_cases"]
c["all_new_neg"] = "" #c["all_new_tests"] - c["all_new_cases"]
c["hospitalized_currently"] = ""
c["hospitalized_cum"] = ""
c["SARS-CoV-2 Source"] = github_url
c["Level"] = "Country"
c["accessed"] = str(currentTime.month) + '/' + str(currentTime.day) + '/' + str(currentTime.year)

c = c.rename({'cum_tests_orig': "Total Tests",
              'new_tests_orig': "Tests Daily",
              'pop_100k': "Population (100K)",
              'new_cases_orig': "Cases Daily",
              'new_deaths_orig': "Deaths Daily",
              'cap_cum_cases': "Total Cases Per Capita",
              'cap_new_cases': "Cases Daily per Capita (7 day rolling average)",
              'cap_cum_deaths': "Total Deaths Per Capita",
              'cap_new_deaths': "Death Daily Per capita (7 day rolling average)",
              'cap_cum_tests': "Total Tests Per Capita (7 day rolling average)",
              'cap_new_tests': "Tests Daily Per Capita (7 day rolling average)",
              'all_cum_cases': "Total Cases",
              'all_new_cases': "Cases Daily (7 day rolling average)",
              'all_cum_deaths': "Total Deaths",
              'all_new_deaths': "Death Daily (7 day rolling average)",
              'all_cum_tests': "Total Tests (7 day rolling average)",
              'all_new_tests': "Tests Daily (7 day rolling average)",
              'pos': "Positivity Rate (7 day rolling average)"
             },axis=1)
print("Renamed Columns")
print(c.columns)

c.head()

Columns
Index(['set', 'name', 'unit', 'time', 'cum_tests_orig', 'new_tests_orig',
       'pop_100k', 'new_cases_orig', 'new_deaths_orig', 'cap_cum_cases',
       'cap_new_cases', 'cap_cum_deaths', 'cap_new_deaths', 'cap_cum_tests',
       'cap_new_tests', 'all_cum_cases', 'all_new_cases', 'all_cum_deaths',
       'all_new_deaths', 'all_cum_tests', 'all_new_tests', 'pos'],
      dtype='object')
Sets
Column Values:
['country' 'income' 'region']
Names
Column Values:
['Afghanistan' 'Albania' 'Algeria' 'Andorra' 'Angola' 'Antigua & Barbuda'
 'Argentina' 'Armenia' 'Australia' 'Austria' 'Azerbaijan' 'Bahamas'
 'Bahrain' 'Bangladesh' 'Barbados' 'Belarus' 'Belgium' 'Belize' 'Benin'
 'Bermuda' 'Bhutan' 'Bolivia' 'Bosnia & Herzegovina' 'Botswana' 'Brazil'
 'Brunei' 'Bulgaria' 'Burkina Faso' 'Burundi' 'Cambodia' 'Cameroon'
 'Canada' 'Cape Verde' 'Central African Republic' 'Chad' 'Chile' 'China'
 'Colombia' 'Comoros' 'Congo - Brazzaville' 'Congo - Kinshasa'
 'Costa Rica' 'Croatia' 'Cuba' 'Cyprus' '

Unnamed: 0,set,name,unit,time,Total Tests,Tests Daily,Population (100K),Cases Daily,Deaths Daily,Total Cases Per Capita,Cases Daily per Capita (7 day rolling average),Total Deaths Per Capita,Death Daily Per capita (7 day rolling average),Total Tests Per Capita (7 day rolling average),Tests Daily Per Capita (7 day rolling average),Total Cases,Cases Daily (7 day rolling average),Total Deaths,Death Daily (7 day rolling average),Total Tests (7 day rolling average),Tests Daily (7 day rolling average),Positivity Rate (7 day rolling average),region,date,new_cases,new_deaths,new_tests,new_negatives,state,county,all_cum_neg,all_new_neg,hospitalized_currently,hospitalized_cum,SARS-CoV-2 Source,Level,accessed
6517,country,Afghanistan,AF,02/24/2020,,,389.28,1,0,0.00256885,0,0,0,0,,1,0,0,0,0,,,South Asia,2020-02-24,1,0,0,,,,,,,,https://github.com/dsbbfinddx/FINDCov19Tracker...,Country,11/15/2020
6714,country,Afghanistan,AF,02/25/2020,,,389.28,0,0,0.00256885,0,0,0,0,,1,0,0,0,0,,,South Asia,2020-02-25,0,0,0,,,,,,,,https://github.com/dsbbfinddx/FINDCov19Tracker...,Country,11/15/2020
6911,country,Afghanistan,AF,02/26/2020,,,389.28,0,0,0.00256885,0,0,0,0,,1,0,0,0,0,,,South Asia,2020-02-26,0,0,0,,,,,,,,https://github.com/dsbbfinddx/FINDCov19Tracker...,Country,11/15/2020
7108,country,Afghanistan,AF,02/27/2020,,,389.28,0,0,0.00256885,0,0,0,0,,1,0,0,0,0,,,South Asia,2020-02-27,0,0,0,,,,,,,,https://github.com/dsbbfinddx/FINDCov19Tracker...,Country,11/15/2020
7305,country,Afghanistan,AF,02/28/2020,,,389.28,0,0,0.00256885,0,0,0,0,,1,0,0,0,0,,,South Asia,2020-02-28,0,0,0,,,,,,,,https://github.com/dsbbfinddx/FINDCov19Tracker...,Country,11/15/2020


In [459]:
concise_cols = ["set","name","time","date","Population (100K)","Cases Daily","Deaths Daily","Tests Daily"]
c_concise = c.loc[c["set"]=="country"].copy()
c_concise = c_concise[concise_cols].copy()
c_concise = c_concise.sort_values(by=["name","time"])
c_concise = c_concise.reset_index()
c_concise.drop(columns=["index","set"],inplace=True)


c_concise["Tests Daily"] = c_concise["Tests Daily"].apply(lambda x: np.nan if x==None else x)
data_cols = ["Cases","Tests","Deaths"]
for col in data_cols:
    original_col = col + " Daily"
    new_rolling_col = original_col + " 7D Rolling"
    new_rolling_rate_col = new_rolling_col + " Rate"
    total_col = "Total " + col
    total_rate_col = total_col + " Rate"
    c_concise[new_rolling_col] = c_concise.groupby("name").rolling(7)[original_col].mean().reset_index(drop=True)
    c_concise[new_rolling_rate_col] = c_concise[new_rolling_col]/c_concise["Population (100K)"]
    c_concise[total_col] = c_concise.groupby(["name"])[original_col].transform(pd.Series.cumsum)
    c_concise[total_rate_col] = c_concise[total_col]/c_concise["Population (100K)"]
c_concise["7D Positivity"] = c_concise["Cases Daily 7D Rolling"]/c_concise["Tests Daily 7D Rolling"]
c_concise["7D Positivity"] = c_concise["7D Positivity"].apply(lambda x: np.nan if x==np.inf else x)

c_concise = c_concise.rename({'name': "Country",
                              'time': "Time",
                              'date': "Date"
                             },axis=1)


c_concise.head(21)

Unnamed: 0,Country,Time,Date,Population (100K),Cases Daily,Deaths Daily,Tests Daily,Cases Daily 7D Rolling,Cases Daily 7D Rolling Rate,Total Cases,Total Cases Rate,Tests Daily 7D Rolling,Tests Daily 7D Rolling Rate,Total Tests,Total Tests Rate,Deaths Daily 7D Rolling,Deaths Daily 7D Rolling Rate,Total Deaths,Total Deaths Rate,7D Positivity
0,Afghanistan,02/24/2020,2020-02-24,389.28,1,0,,,,1,0.00256885,,,,,,,0,0,
1,Afghanistan,02/25/2020,2020-02-25,389.28,0,0,,,,1,0.00256885,,,,,,,0,0,
2,Afghanistan,02/26/2020,2020-02-26,389.28,0,0,,,,1,0.00256885,,,,,,,0,0,
3,Afghanistan,02/27/2020,2020-02-27,389.28,0,0,,,,1,0.00256885,,,,,,,0,0,
4,Afghanistan,02/28/2020,2020-02-28,389.28,0,0,,,,1,0.00256885,,,,,,,0,0,
5,Afghanistan,02/29/2020,2020-02-29,389.28,0,0,,,,1,0.00256885,,,,,,,0,0,
6,Afghanistan,03/01/2020,2020-03-01,389.28,0,0,,0.142857,0.000366978,1,0.00256885,,,,,0.0,0.0,0,0,
7,Afghanistan,03/02/2020,2020-03-02,389.28,0,0,,0.0,0.0,1,0.00256885,,,,,0.0,0.0,0,0,
8,Afghanistan,03/03/2020,2020-03-03,389.28,0,0,59.0,0.0,0.0,1,0.00256885,,,59.0,0.151562,0.0,0.0,0,0,
9,Afghanistan,03/04/2020,2020-03-04,389.28,0,0,0.0,0.0,0.0,1,0.00256885,,,59.0,0.151562,0.0,0.0,0,0,


In [304]:
# Get all countries
countries_df = c.loc[c["set"]=="country"]
countries_df["name"].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua & Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia & Herzegovina', 'Botswana', 'Brazil', 'Brunei',
       'Bulgaria', 'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia',
       'Cameroon', 'Canada', 'Central African Republic', 'Chad', 'Chile',
       'China', 'Colombia', 'Comoros', 'Costa Rica', 'Croatia', 'Cuba',
       'Cyprus', 'Czech Republic', 'Côte d’Ivoire',
       'Democratic Republic of Congo', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador',
       'Equatorial Guinea', 'Eritrea', 'Estonia', 'Ethiopia', 'Fiji',
       'Finland', 'France', 'Gabon', 'Gambia', 'Georgia', 'Germany',
       'Ghana', 'Greece', 'Grenada', 'Guatemala', 'Guinea',
       'Guinea-Bissau', 'Guyana', 'Haiti', 'Hondura

Build Demographics

In [305]:
country_locations = pd.read_excel(locationsFile)
print(country_locations.columns)
country_locations.drop(columns=["Population","Alternate"], inplace = True)
country_locations.rename(columns = {'Region': 'Population Region'}, inplace = True)
conversions = {}
country_locations["Country"] = country_locations["Country"].astype(str)
country_locations["Country"] = country_locations["Country"].apply(lambda x: fixCountry(x))
print(conversions)
countries = print_column_missing(country_locations["Country"],all_countries)
country_locations.head()

Index(['Region', 'Country', 'Population', 'Latitude', 'Longitude',
       'Alternate'],
      dtype='object')
{}
Column Values:
['Albania' 'Algeria' 'Andorra' 'Angola' 'Antigua & Barbuda' 'Argentina'
 'Armenia' 'Aruba' 'Australia' 'Austria' 'Azerbaijan' 'Bahamas' 'Bahrain'
 'Barbados' 'Belarus' 'Belgium' 'Belize' 'Benin' 'Bermuda' 'Bolivia'
 'Bosnia & Herzegovina' 'Botswana' 'Brazil' 'British Virgin Islands'
 'Brunei' 'Bulgaria' 'Burkina Faso' 'Burundi' 'Cabo Verde' 'Cambodia'
 'Cameroon' 'Canada' 'Cayman Islands' 'Central African Republic' 'Chad'
 'Chile' 'China' 'Colombia' 'Comoros' 'Costa Rica' 'Croatia' 'Cuba'
 'Curacao' 'Cyprus' 'Czech Republic' 'Côte d’Ivoire'
 'Democratic Republic of Congo' 'Denmark' 'Djibouti' 'Dominica'
 'Dominican Republic' 'Ecuador' 'Egypt' 'El Salvador' 'Equatorial Guinea'
 'Eritrea' 'Estonia' 'Ethiopia' 'Faeroe Islands' 'Fiji' 'Finland' 'France'
 'French Polynesia' 'Gabon' 'Gambia' 'Georgia' 'Germany' 'Ghana'
 'Gibraltar' 'Greece' 'Greenland' 'Grenada' 'Gu

Unnamed: 0,Population Region,Country,Latitude,Longitude
0,Carribean,Antigua & Barbuda,17.060816,-61.796428
1,Carribean,Aruba,12.52111,-69.968338
2,Carribean,Bahamas,25.03428,-77.39628
3,Carribean,Barbados,13.193887,-59.543198
4,Carribean,Bermuda,32.321384,-64.75737


In [306]:
country_demographics_all = pd.merge(country_populations, country_locations, how="left", on="Country")
print(country_demographics_all.columns)
print("All Countries")
demographics_countries = country_demographics_all["Country"].sort_values(ascending = True).unique()
print(demographics_countries)
country_demographics_all["Country"] = country_demographics_all["Country"].astype(str)
print("Configured Countries")
print(configured_countries)
is_configured_demographics = country_demographics_all.Country.isin(configured_countries)
country_demographics = country_demographics_all[is_configured_demographics].copy()
print(country_demographics["Country"])
country_demographics.head()

Index(['Country', 'Population', 'Population (100K)', 'World Share (%)',
       'Urban Population (%)', 'Annual Change (%)', 'Net Change',
       'Migrants (net)', 'Density (P/Km²)', 'Land Area (Km²)',
       'Fertility Rate', 'Median Age', 'Population Source',
       'Population Region', 'Latitude', 'Longitude'],
      dtype='object')
All Countries
['Afghanistan' 'Albania' 'Algeria' 'American Samoa' 'Andorra' 'Angola'
 'Anguilla' 'Antigua & Barbuda' 'Argentina' 'Armenia' 'Aruba' 'Australia'
 'Austria' 'Azerbaijan' 'Bahamas' 'Bahrain' 'Bangladesh' 'Barbados'
 'Belarus' 'Belgium' 'Belize' 'Benin' 'Bermuda' 'Bhutan' 'Bolivia'
 'Bosnia & Herzegovina' 'Botswana' 'Brazil' 'British Virgin Islands'
 'Brunei' 'Bulgaria' 'Burkina Faso' 'Burundi' 'Cabo Verde' 'Cambodia'
 'Cameroon' 'Canada' 'Caribbean Netherlands' 'Cayman Islands'
 'Central African Republic' 'Chad' 'Channel Islands' 'Chile' 'China'
 'Colombia' 'Comoros' 'Cook Islands' 'Costa Rica' 'Croatia' 'Cuba'
 'Curaçao' 'Cyprus' 'Czech Repub

Unnamed: 0,Country,Population,Population (100K),World Share (%),Urban Population (%),Annual Change (%),Net Change,Migrants (net),Density (P/Km²),Land Area (Km²),Fertility Rate,Median Age,Population Source,Population Region,Latitude,Longitude
0,Afghanistan,38928346,389.28346,0.5,25.0,2.33,886592,-62920.0,60,652860,4.6,18.0,https://www.worldometers.info/world-population...,,,
1,Albania,2877797,28.77797,0.04,63.0,-0.11,-3120,-14000.0,105,27400,1.6,36.0,https://www.worldometers.info/world-population...,Europe,41.153332,20.168331
2,Algeria,43851044,438.51044,0.56,73.0,1.85,797990,-10000.0,18,2381740,3.1,29.0,https://www.worldometers.info/world-population...,Middle East and North Africa,28.033886,1.659626
4,Andorra,77265,0.77265,0.0,88.0,0.16,123,,164,470,,,https://www.worldometers.info/world-population...,Europe,42.546245,1.601554
5,Angola,32866272,328.66272,0.42,67.0,3.27,1040977,6413.0,26,1246700,5.6,17.0,https://www.worldometers.info/world-population...,Sub-Saharan Africa,-11.202692,17.873887


In [394]:
is_configured = c.name.isin(configured_countries)
configured = c[is_configured].copy()
configured.head(-10)

script_order = ["time","region","name","state","county","pop_100k",
                "all_cum_cases","all_cum_neg",
        

        "hospitalized_currently","hospitalized_cum",
                "all_cum_deaths","new_deaths",
                "all_new_neg","new_cases","new_tests","pos",
                "SARS-CoV-2 Source","accessed"]
column_names = ["Date","Region","Country","State","County","Population 100k",
                "Positive Total", "Negative Total",
                "Hospitalized Currently", "Hospitalized Cumulative",
                "Deaths Total","Death Daily",
                "Negative Daily","Positive Daily","Tests Daily","Pos",
                "SARS-CoV-2 Source","SARS-CoV-2 Accessed"]
integer_output = ["Positive Total", "Negative Total",
                "Hospitalized Currently", "Hospitalized Cumulative",
                "Deaths Total","Death Daily",
                "Negative Daily","Positive Daily","Tests Daily","Pos"]

configured_output = configured[script_order].copy()
configured_output.columns = column_names
configured_output[integer_output] = configured_output[integer_output].apply(lambda x: pd.to_numeric(x, 
                                                                                                    errors='coerce', 
                                                                                                    downcast='integer'))
configured_output = configured_output[configured_output.columns].astype(str)
for i in configured_output.columns:
    configured_output[i] = configured_output[i].apply(lambda x: emptyNan(x))
configured_output.head(-10)

Unnamed: 0,Date,Region,Country,State,County,Population 100k,Positive Total,Negative Total,Hospitalized Currently,Hospitalized Cumulative,Deaths Total,Death Daily,Negative Daily,Positive Daily,Tests Daily,Pos,SARS-CoV-2 Source,SARS-CoV-2 Accessed
6517,02/24/2020,South Asia,Afghanistan,,,389.28,1,,,,0,0,,1,0,,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020
6714,02/25/2020,South Asia,Afghanistan,,,389.28,1,,,,0,0,,0,0,,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020
6911,02/26/2020,South Asia,Afghanistan,,,389.28,1,,,,0,0,,0,0,,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020
7108,02/27/2020,South Asia,Afghanistan,,,389.28,1,,,,0,0,,0,0,,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020
7305,02/28/2020,South Asia,Afghanistan,,,389.28,1,,,,0,0,,0,0,,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56793,10/29/2020,Sub-Saharan Africa,Zimbabwe,,,148.63,8349,,,,242,0,,29,3999,0.009640102827763496,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020
56996,10/30/2020,Sub-Saharan Africa,Zimbabwe,,,148.63,8362,,,,242,0,,13,3998,0.007334963325183373,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020
57199,10/31/2020,Sub-Saharan Africa,Zimbabwe,,,148.63,8367,,,,243,1,,5,3999,0.0055270430319778905,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020
57402,11/01/2020,Sub-Saharan Africa,Zimbabwe,,,148.63,8374,,,,243,0,,7,3999,0.004632693580410324,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020


In [395]:
# Merge input data and demographics
merged_input = pd.merge(configured_output, country_demographics, how="left", on="Country")
print_column_unique(merged_input["Region"])
print(merged_input.dtypes)
merged_input.head()

Column Values:
['Central Asia' 'East Asia and Pacific' 'Europe' 'Latin America'
 'Middle East and North Africa' 'South Asia' 'Sub-Saharan Africa']
Date                        object
Region                      object
Country                     object
State                       object
County                      object
Population 100k             object
Positive Total              object
Negative Total              object
Hospitalized Currently      object
Hospitalized Cumulative     object
Deaths Total                object
Death Daily                 object
Negative Daily              object
Positive Daily              object
Tests Daily                 object
Pos                         object
SARS-CoV-2 Source           object
SARS-CoV-2 Accessed         object
Population                 float64
Population (100K)          float64
World Share (%)            float64
Urban Population (%)       float64
Annual Change (%)          float64
Net Change                 float64
Migrants (net

Unnamed: 0,Date,Region,Country,State,County,Population 100k,Positive Total,Negative Total,Hospitalized Currently,Hospitalized Cumulative,Deaths Total,Death Daily,Negative Daily,Positive Daily,Tests Daily,Pos,SARS-CoV-2 Source,SARS-CoV-2 Accessed,Population,Population (100K),World Share (%),Urban Population (%),Annual Change (%),Net Change,Migrants (net),Density (P/Km²),Land Area (Km²),Fertility Rate,Median Age,Population Source,Population Region,Latitude,Longitude
0,02/24/2020,South Asia,Afghanistan,,,389.28,1,,,,0,0,,1,0,,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020,38928346.0,389.28346,0.5,25.0,2.33,886592.0,-62920.0,60.0,652860.0,4.6,18.0,https://www.worldometers.info/world-population...,,,
1,02/25/2020,South Asia,Afghanistan,,,389.28,1,,,,0,0,,0,0,,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020,38928346.0,389.28346,0.5,25.0,2.33,886592.0,-62920.0,60.0,652860.0,4.6,18.0,https://www.worldometers.info/world-population...,,,
2,02/26/2020,South Asia,Afghanistan,,,389.28,1,,,,0,0,,0,0,,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020,38928346.0,389.28346,0.5,25.0,2.33,886592.0,-62920.0,60.0,652860.0,4.6,18.0,https://www.worldometers.info/world-population...,,,
3,02/27/2020,South Asia,Afghanistan,,,389.28,1,,,,0,0,,0,0,,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020,38928346.0,389.28346,0.5,25.0,2.33,886592.0,-62920.0,60.0,652860.0,4.6,18.0,https://www.worldometers.info/world-population...,,,
4,02/28/2020,South Asia,Afghanistan,,,389.28,1,,,,0,0,,0,0,,https://github.com/dsbbfinddx/FINDCov19Tracker...,11/14/2020,38928346.0,389.28346,0.5,25.0,2.33,886592.0,-62920.0,60.0,652860.0,4.6,18.0,https://www.worldometers.info/world-population...,,,


In [396]:
# Write input data
configuredDemographics = configuredFolder + "raw_demographics.xlsx"
merged_input.to_excel(configuredDemographics, index = False)

Output New Measures

In [397]:
pgmm_ssa = pd.read_excel(south_africa_r)
pgmm_ssa.rename(columns = {'County': 'Country'}, inplace = True)
print(pgmm_ssa.columns)
fixCountries(pgmm_ssa["Country"],sub_saharan_african_countries)
conversions = {}
pgmm_ssa["Region"] = "Sub-Saharan Africa"
pgmm_ssa["Country"] = pgmm_ssa["Country"].astype(str)
pgmm_ssa["Country"] = pgmm_ssa["Country"].apply(lambda x: fixCountry(x))
pgmm_ssa["Level"] = "Country"
pgmm_ssa.rename(columns = {'Rate of Deaths': 'Death Rate'}, inplace = True)
print(conversions)
print_column_missing(pgmm_ssa["Country"],sub_saharan_african_countries)
print(pgmm_ssa.dtypes)
pgmm_ssa.to_excel(cleanedFolder + "ss_africa.xlsx", index = False)
pgmm_ssa[pgmm_ssa["Country"].str.contains("Congo")].head()

Index(['Date', 'Country', 'New COVID Cases', 'Cumulative COVID Cases',
       '7 Day Moving Average New Cases', 'Rate of Infection', 'New Deaths',
       'Cumulative Deaths', '7 Day Moving Average of Death Rate',
       'Rate of Deaths', 'Speed', 'Acceleration', 'Jerk', '1 Day Persistence',
       '7 Day Persistence'],
      dtype='object')
{}
Column Values:
['Angola' 'Benin' 'Botswana' 'Burkina Faso' 'Burundi' 'Cabo Verde'
 'Cameroon' 'Central African Republic' 'Chad' 'Comoros' 'Côte d’Ivoire'
 'Democratic Republic of Congo' 'Equatorial Guinea' 'Eritrea' 'Ethiopia'
 'Gabon' 'Gambia' 'Ghana' 'Guinea' 'Guinea-Bissau' 'Kenya' 'Lesotho'
 'Liberia' 'Madagascar' 'Malawi' 'Mali' 'Mauritania' 'Mauritius'
 'Mozambique' 'Namibia' 'Niger' 'Nigeria' 'Republic of the Congo' 'Rwanda'
 'Senegal' 'Seychelles' 'Sierra Leone' 'Somalia' 'South Africa'
 'South Sudan' 'Sudan' 'Swaziland' 'São Tomé and Príncipe' 'Togo' 'Uganda'
 'Zambia' 'Zimbabwe']
Comparison:
['Angola', 'Benin', 'Botswana', 'Burkina Faso

Unnamed: 0,Date,Country,New COVID Cases,Cumulative COVID Cases,7 Day Moving Average New Cases,Rate of Infection,New Deaths,Cumulative Deaths,7 Day Moving Average of Death Rate,Death Rate,Speed,Acceleration,Jerk,1 Day Persistence,7 Day Persistence,Region,Level
22,2020-09-08,Republic of the Congo,0,4891,130.285714,0.0,12,102,5.142857,0.223027,2.421439,0.0,-0.698288,12.186655,0.0,Sub-Saharan Africa,Country
23,2020-09-15,Republic of the Congo,0,4934,6.142857,0.0,0,88,,0.0,0.114169,0.0,0.682357,0.57459,12.965255,Sub-Saharan Africa,Country
24,2020-09-08,Democratic Republic of Congo,59,10292,26.857143,0.06798,0,260,0.142857,0.0,0.030945,0.008559,0.013333,1.817308,3.028069,Sub-Saharan Africa,Country
25,2020-09-15,Democratic Republic of Congo,11,10401,15.571429,0.012674,3,267,1.0,0.003457,0.017941,-0.007901,-0.004115,2.097922,2.672662,Sub-Saharan Africa,Country


In [398]:
pgmm_sa = pd.read_excel(south_asia_r)
print(pgmm_sa.columns)
conversions = {}
pgmm_sa["Region"] = "South Asia"
pgmm_sa["Country"] = pgmm_sa["Country"].astype(str)
pgmm_sa["Country"] = pgmm_sa["Country"].apply(lambda x: fixCountry(x))
pgmm_sa.rename(columns = {'7 Day Moving Average of Death Rate': '7 Day Moving Average Deaths',
                          'Rate of Deaths': 'Death Rate'}, 
               inplace = True)
pgmm_sa["Level"] = "Country"
print(conversions)
print_column_missing(pgmm_sa["Country"],south_asia_countries)
print(pgmm_sa.dtypes)
pgmm_sa.to_excel(cleanedFolder + "South_Asia_Cleaned.xlsx", index = False)
pgmm_sa.head()

Index(['Country', 'Date', 'New COVID Cases', 'Cumulative COVID Cases',
       '7 Day Moving Average New Cases', 'Rate of Infection', 'New Deaths',
       'Cumulative Deaths', '7 Day Moving Average of Death Rate',
       'Rate of Deaths', 'Speed', 'Acceleration', 'Jerk', '7 Day Persistence'],
      dtype='object')
{}
Column Values:
['Afghanistan' 'Bangladesh' 'Bhutan' 'India' 'Maldives' 'Nepal' 'Pakistan'
 'Sri Lanka']
Comparison:
['Afghanistan', 'Bangladesh', 'Bhutan', 'India', 'Maldives', 'Nepal', 'Pakistan', 'Sri Lanka']
No missing values
No missing values
Country                                   object
Date                              datetime64[ns]
New COVID Cases                            int64
Cumulative COVID Cases                     int64
7 Day Moving Average New Cases           float64
Rate of Infection                        float64
New Deaths                                 int64
Cumulative Deaths                          int64
7 Day Moving Average Deaths              fl

Unnamed: 0,Country,Date,New COVID Cases,Cumulative COVID Cases,7 Day Moving Average New Cases,Rate of Infection,New Deaths,Cumulative Deaths,7 Day Moving Average Deaths,Death Rate,Speed,Acceleration,Jerk,7 Day Persistence,Region,Level
0,Afghanistan,2020-09-22,22,39096,40.142857,0.057831,1,1445,2.714286,0.002629,0.105523,-0.007886,0.001878,1.09,South Asia,Country
1,Bangladesh,2020-09-22,1557,352178,1588.857143,0.954944,28,5007,29.285714,0.017173,0.974483,-0.014632,-0.005082,1.17,South Asia,Country
2,Bhutan,2020-09-22,0,261,2.142857,0.0,0,0,0.0,0.0,0.280812,-0.018721,-0.018721,1.1,South Asia,Country
3,India,2020-09-22,0,5562663,77472.0,0.0,0,88935,981.285714,0.0,5.669716,-0.942224,-0.850995,1.67,South Asia,Country
4,Maldives,2020-09-22,48,9818,70.0,9.040348,0,34,0.142857,0.0,13.183841,-0.995515,-0.349775,2.46,South Asia,Country


In [399]:
pgmm_la = pd.read_excel(latin_america_r)
print(pgmm_la.columns)
conversions = {}
pgmm_la["Region"] = "Latin America"
pgmm_la["Country"] = pgmm_la["Country"].astype(str)
pgmm_la["Country"] = pgmm_la["Country"].apply(lambda x: fixCountry(x))
pgmm_la.rename(columns = {'7 Day Moving Average': '7 Day Moving Average New Cases',
                          '7 Day Moving Average.1': '7 Day Moving Average Deaths',
                          'Deaths': 'New Deaths'}, 
               inplace = True)
pgmm_la["Level"] = "Country"
print(conversions)
print_column_missing(pgmm_la["Country"],latin_american_countries)
print(pgmm_la.dtypes)
pgmm_la.to_excel(cleanedFolder + "Latin_America_cleaned.xlsx", index = False)
pgmm_la.head()

Index(['Date', 'Country', 'New Cases', 'Cumulative Cases',
       '7 Day Moving Average', 'Infection Rate', 'Deaths', 'Cumulative Deaths',
       '7 Day Moving Average.1', 'Death Rate', 'Speed', 'Acceleration', 'Jerk',
       '1 Day Persistence', '7 Day Persistence'],
      dtype='object')
{}
Column Values:
['Antigua & Barbuda' 'Argentina' 'Bahamas' 'Barbados' 'Belize' 'Bolivia'
 'Brazil' 'Chile' 'Colombia' 'Costa Rica' 'Cuba' 'Dominica'
 'Dominican Republic' 'Ecuador' 'El Salvador' 'Grenada' 'Guatemala'
 'Guyana' 'Haiti' 'Honduras' 'Jamaica' 'Mexico' 'Panama' 'Paraguay' 'Peru'
 'St. Kitts & Nevis' 'St. Lucia' 'St. Vincent & Grenadines' 'Suriname'
 'Trinidad & Tobago' 'Uruguay' 'Venezuela']
Comparison:
['Antigua & Barbuda', 'Aruba', 'Bahamas', 'Barbados', 'Bermuda', 'British Virgin Islands', 'Cayman Islands', 'Cuba', 'Curacao', 'Dominica', 'Dominican Republic', 'Grenada', 'Haiti', 'Jamaica', 'Puerto Rico', 'St. Kitts & Nevis', 'St. Lucia', 'St. Vincent & Grenadines', 'Sint Maarten', 'T

Unnamed: 0,Date,Country,New Cases,Cumulative Cases,7 Day Moving Average New Cases,Infection Rate,New Deaths,Cumulative Deaths,7 Day Moving Average Deaths,Death Rate,Speed,Acceleration,Jerk,1 Day Persistence,7 Day Persistence,Region,Level
0,2020-10-06,Antigua & Barbuda,0,107,0.857143,0.0,0,3,0.0,0.0,0.882579,-1.268826e-16,1.268826e-16,-0.430702,0.065819,Latin America,Country
1,2020-10-13,Antigua & Barbuda,0,111,0.571429,0.0,0,3,0.0,0.0,0.588386,0.0,-6.344132000000001e-17,-0.436498,0.197754,Latin America,Country
2,2020-10-06,Argentina,14740,824468,12551.285714,32.800228,359,21827,758.285714,0.798866,27.929785,0.4014992,0.5811089,0.094288,23.480881,Latin America,Country
3,2020-10-13,Argentina,13305,917035,13223.857143,29.60699,386,24572,392.142857,0.858948,29.426427,-0.4561768,0.0899638,0.140674,24.457158,Latin America,Country
4,2020-10-06,Bahamas,107,4559,93.714286,27.472386,4,100,1.285714,1.027005,24.061262,1.540508,-0.03667875,-0.004373,13.749764,Latin America,Country


In [400]:
pgmm_ca = pd.read_excel(central_asia_r)
pgmm_ca["Region"] = "Central Asia"
pgmm_ca["Country"] = pgmm_ca["Country"].astype(str)
pgmm_ca["Country"] = pgmm_ca["Country"].apply(lambda x: "Central Asia" if x == "Region" else x)
pgmm_ca.rename(columns = {'7 Day Moving Average': '7 Day Moving Average New Cases',
                          '7 Day Moving Average.1': '7 Day Moving Average Deaths',
                          'Deaths': 'New Deaths'}, 
               inplace = True)
pgmm_ca["Level"] = pgmm_ca["Country"].apply(lambda x: "Region" if x == "Central Asia" else "Country")
print_column_missing(pgmm_ca["Country"],central_asian_countries)
print(pgmm_ca.dtypes)
pgmm_ca.to_excel(cleanedFolder + "Central_Asia_cleaned.xlsx", index = False)
pgmm_ca.head()

Column Values:
['Armenia' 'Azerbaijan' 'Central Asia' 'Cyprus' 'Georgia' 'Kazakhstan'
 'Kosovo' 'Kyrgyzstan' 'North Macedonia' 'Russia' 'Tajikistan' 'Turkey'
 'Uzbekistan']
Comparison:
['Armenia', 'Azerbaijan', 'Cyprus', 'Faeroe Islands', 'Georgia', 'Gibraltar', 'Kazakhstan', 'Kosovo', 'Kyrgyzstan', 'North Macedonia', 'Russia', 'Tajikistan', 'Turkey', 'Turkmenistan', 'Uzbekistan']
Column values not in comparison:
['Central Asia']
Comparison values not in column:
['Faeroe Islands', 'Gibraltar', 'Turkmenistan']
Date                              datetime64[ns]
Country                                   object
New Cases                                  int64
Cumulative Cases                           int64
7 Day Moving Average New Cases           float64
Infection Rate                           float64
New Deaths                                 int64
Cumulative Deaths                          int64
7 Day Moving Average Deaths              float64
Death Rate                               flo

Unnamed: 0,Date,Country,New Cases,Cumulative Cases,7 Day Moving Average New Cases,Infection Rate,New Deaths,Cumulative Deaths,7 Day Moving Average Deaths,Death Rate,Speed,Acceleration,Jerk,1 Day Persistence,7 Day Persistence,Region,Level
0,2020-10-06,Armenia,406,53083,454.571429,13.726739,6,990,4.571429,0.202858,15.368924,0.381567,0.347757,16.115213,-0.554776,Central Asia,Country
1,2020-10-13,Armenia,745,57566,640.428571,25.188227,6,1032,6.0,0.202858,21.652698,1.637356,0.724494,21.521573,-0.790196,Central Asia,Country
2,2020-10-06,Azerbaijan,143,40931,116.0,1.426673,2,600,1.428571,0.019953,1.157301,0.121146,0.037056,1.11413,-0.0543,Central Asia,Country
3,2020-10-13,Azerbaijan,277,42381,207.142857,2.763556,3,612,1.714286,0.02993,2.06661,0.190983,0.155352,2.016774,-0.059503,Central Asia,Country
4,2020-10-06,Cyprus,29,1876,19.0,2.41954,1,23,0.142857,0.083432,1.585216,-0.011919,-0.083432,1.717325,-0.076602,Central Asia,Country


In [401]:
pgmm_eu = pd.read_excel(europe_r)
pgmm_eu["Region"] = "Europe"
pgmm_eu["Country"] = pgmm_eu["Country"].astype(str)
pgmm_eu["Country"] = pgmm_eu["Country"].apply(lambda x: "Europe" if x == "Region" else x)
pgmm_eu.rename(columns = {'7 Day Moving Average': '7 Day Moving Average New Cases',
                          '7 Day Moving Average.1': '7 Day Moving Average Deaths',
                          'Deaths': 'New Deaths'}, 
               inplace = True)
pgmm_eu["Level"] = pgmm_eu["Country"].apply(lambda x: "Region" if x == "Europe" else "Country")
print_column_missing(pgmm_eu["Country"],european_countries)
print(pgmm_eu.dtypes)
pgmm_eu.to_excel(cleanedFolder + "Europe_cleaned.xlsx", index = False)
pgmm_eu.head()

Column Values:
['Austria' 'Belarus' 'Belgium' 'Bulgaria' 'Croatia' 'Czech Republic'
 'Denmark' 'Estonia' 'Europe' 'Finland' 'France' 'Germany' 'Greece'
 'Hungary' 'Iceland' 'Ireland' 'Italy' 'Latvia' 'Lithuania' 'Luxembourg'
 'Malta' 'Netherlands' 'Norway' 'Poland' 'Portugal' 'Romania' 'Serbia'
 'Slovakia' 'Slovenia' 'Spain' 'Sweden' 'Switzerland' 'Ukraine'
 'United Kingdom']
Comparison:
['Albania', 'Andorra', 'Austria', 'Belarus', 'Belgium', 'Bosnia & Herzegovina', 'Bulgaria', 'Croatia', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Greenland', 'Hungary', 'Iceland', 'Ireland', 'Isle of Man', 'Italy', 'Latvia', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Malta', 'Moldova', 'Monaco', 'Montenegro', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Romania', 'San Marino', 'Serbia', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'Ukraine', 'United Kingdom', 'Vatican City']
Column values not in comparison:
['Europe']
Comparison values not in co

Unnamed: 0,Date,Country,New Cases,Cumulative Cases,7 Day Moving Average New Cases,Infection Rate,New Deaths,Cumulative Deaths,7 Day Moving Average Deaths,Death Rate,Speed,Acceleration,Jerk,1 Day Persistence,7 Day Persistence,Region,Level
0,2020-10-07,Austria,549,50435,832.571429,6.184475,22,840,6.285714,0.24783,9.378902,-0.278406,-1.644687,3.368949,7.970965,Europe,Country
1,2020-10-14,Austria,1171,57762,1046.714286,13.191294,10,879,5.571429,0.11265,11.791218,1.000974,2.063101,3.764173,9.863012,Europe,Country
2,2020-10-07,Belarus,394,81090,404.285714,4.161889,6,868,5.714286,0.063379,4.270538,0.120722,0.024144,1.447662,3.421389,Europe,Country
3,2020-10-14,Belarus,526,84524,490.571429,5.556227,5,906,5.428571,0.052816,5.181989,0.199191,0.104123,1.738248,4.49097,Europe,Country
4,2020-10-07,Belgium,5686,146382,3434.285714,49.512128,18,10108,13.285714,0.156739,29.904818,3.368646,0.299795,9.257136,16.67917,Europe,Country


In [402]:
pgmm_me = pd.read_excel(middle_east_r)
pgmm_me["Region"] = "Middle East and North Africa"
pgmm_me["Country"] = pgmm_me["Country"].astype(str)
pgmm_me["Country"] = pgmm_me["Country"].apply(lambda x: "Middle East and North Africa" if x == "Region" else x)
pgmm_me.rename(columns = {'7 Day Moving Average': '7 Day Moving Average New Cases',
                          '7 Day Moving Average.1': '7 Day Moving Average Deaths',
                          'Deaths': 'New Deaths'}, inplace = True)
pgmm_me["Level"] = pgmm_me["Country"].apply(lambda x: "Region" if x == "Middle East and North Africa" else "Country")
print_column_missing(pgmm_me["Country"],middle_east_and_north_africa_countries)
print_column_missing(pgmm_me["Country"],demographics_countries)
print(pgmm_me.dtypes)
pgmm_me.to_excel(cleanedFolder + "Middle_East_North_Africa_cleaned.xlsx", index = False)
pgmm_me.head()

Column Values:
['Algeria' 'Bahrain' 'Djibouti' 'Egypt' 'Iran' 'Iraq' 'Israel' 'Jordan'
 'Lebanon' 'Libya' 'Middle East and North Africa' 'Morocco' 'Oman' 'Qatar'
 'Saudi Arabia' 'Tunisia' 'United Arab Emirates']
Comparison:
['Bahrain', 'Iran', 'Iraq', 'Israel', 'Jordan', 'Kuwait', 'Lebanon', 'Oman', 'Qatar', 'Saudi Arabia', 'Syria', 'United Arab Emirates', 'Yemen', 'Algeria', 'Djibouti', 'Egypt', 'Libya', 'Morocco', 'Tunisia']
Column values not in comparison:
['Middle East and North Africa']
Comparison values not in column:
['Kuwait', 'Syria', 'Yemen']
Column Values:
['Algeria' 'Bahrain' 'Djibouti' 'Egypt' 'Iran' 'Iraq' 'Israel' 'Jordan'
 'Lebanon' 'Libya' 'Middle East and North Africa' 'Morocco' 'Oman' 'Qatar'
 'Saudi Arabia' 'Tunisia' 'United Arab Emirates']
Comparison:
['Afghanistan' 'Albania' 'Algeria' 'American Samoa' 'Andorra' 'Angola'
 'Anguilla' 'Antigua & Barbuda' 'Argentina' 'Armenia' 'Aruba' 'Australia'
 'Austria' 'Azerbaijan' 'Bahamas' 'Bahrain' 'Bangladesh' 'Barbados'
 'Be

Unnamed: 0,Date,Country,New Cases,Cumulative Cases,7 Day Moving Average New Cases,Infection Rate,New Deaths,Cumulative Deaths,7 Day Moving Average Deaths,Death Rate,Speed,Acceleration,Jerk,1 Day Persistence,7 Day Persistence,Region,Level
0,2020-10-11,Algeria,132,53072,133.714286,0.306598,6,1801,5.857143,0.013936,0.31058,-0.002986,0.000995,0.055256,0.202084,Middle East and North Africa,Country
1,2020-10-18,Algeria,199,54402,190.0,0.46222,10,1856,7.857143,0.023227,0.441316,0.022232,-0.000664,0.07385,0.176942,Middle East and North Africa,Country
2,2020-10-11,Bahrain,327,75614,421.714286,19.924785,2,275,2.142857,0.121864,25.695923,-0.217615,0.478752,4.566401,16.370049,Middle East and North Africa,Country
3,2020-10-18,Bahrain,331,77902,326.857143,20.168514,7,300,3.571429,0.426524,19.916081,0.034818,1.610347,3.503413,14.639317,Middle East and North Africa,Country
4,2020-10-11,Djibouti,0,5423,0.571429,0.0,0,61,0.0,0.0,0.058695,-0.014674,0.0,0.012929,0.083598,Middle East and North Africa,Country


In [403]:
pgmm_ep = pd.read_excel(east_asia_pacific_r)
pgmm_ep["Region"] = "East Asia and Pacific"
pgmm_ep["Country"] = pgmm_ep["Country"].astype(str)
pgmm_ep["Country"] = pgmm_ep["Country"].apply(lambda x: "East Asia and Pacific" if x == "Region" else x)
pgmm_ep.rename(columns = {'7 Day Moving Average': '7 Day Moving Average New Cases',
                          '7 Day Moving Average.1': '7 Day Moving Average Deaths',
                          'Deaths': 'New Deaths'}, 
               inplace = True)
pgmm_ep["Level"] = pgmm_ep["Country"].apply(lambda x: "Region" if x == "East Asia and Pacific" else "Country")
print_column_missing(pgmm_ep["Country"],east_asia_and_pacific_countries)
print_column_missing(pgmm_ep["Country"],demographics_countries)
print(pgmm_ep.dtypes)
pgmm_ep.to_excel(cleanedFolder + "East_Asia_Pacific_cleaned.xlsx", index = False)
pgmm_ep.head()

Column Values:
['Australia' 'Brunei' 'China' 'East Asia and Pacific' 'French Polynesia'
 'Guam' 'Indonesia' 'Japan' 'Malaysia' 'Mongolia' 'Myanmar' 'New Zealand'
 'Papua New Guinea' 'Philippines' 'Singapore' 'South Korea' 'Taiwan'
 'Thailand' 'Vietnam']
Comparison:
['Brunei', 'Cambodia', 'China', 'Indonesia', 'Japan', 'Laos', 'Malaysia', 'Mongolia', 'Myanmar', 'Niue', 'North Korea', 'Philippines', 'Singapore', 'South Korea', 'Taiwan', 'Thailand', 'Timor', 'Vietnam', 'Australia', 'Cook Islands', 'Fiji', 'French Polynesia', 'Guam', 'Kiribati', 'Marshall Islands', 'Micronesia', 'Nauru', 'New Caledonia', 'New Zealand', 'Northern Mariana Islands', 'Palau', 'Papua New Guinea', 'Samoa', 'Solomon Islands', 'Tonga', 'Tuvalu', 'Vanuatu']
Column values not in comparison:
['East Asia and Pacific']
Comparison values not in column:
['Cambodia', 'Laos', 'Niue', 'North Korea', 'Timor', 'Cook Islands', 'Fiji', 'Kiribati', 'Marshall Islands', 'Micronesia', 'Nauru', 'New Caledonia', 'Northern Mariana Isl

Unnamed: 0,Date,Country,New Cases,Cumulative Cases,7 Day Moving Average New Cases,Infection Rate,New Deaths,Cumulative Deaths,7 Day Moving Average Deaths,Death Rate,Speed,Acceleration,Jerk,1 Day Persistence,7 Day Persistence,Region,Level
0,2020-10-11,Australia,15,27244,17.571429,0.059138,0,897,0.571429,0.0,0.069276,0.003943,0.000563,-0.000467,0.05248,East Asia and Pacific,Country
1,2020-10-11,Brunei,0,146,0.0,0.0,0,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,East Asia and Pacific,Country
2,2020-10-11,China,21,90778,24.857143,0.001502,0,4739,0.0,0.0,0.001778,1e-05,-0.000112,-1.3e-05,0.001342,East Asia and Pacific,Country
3,2020-10-11,French Polynesia,0,2692,104.0,0.0,0,10,0.285714,0.0,37.23768,0.0,-11.66235,-0.265958,28.687413,East Asia and Pacific,Country
4,2020-10-11,Guam,89,3078,54.142857,53.199756,2,60,1.285714,1.1955,32.363897,-5.123572,-9.82018,-0.267742,26.143501,East Asia and Pacific,Country


In [404]:
def fixProvince(value):
    province_map = {
        'BC': 'British Columbia',
        'NL': 'Newfoundland and Labrador',
        'NWT': 'Northwest Territories',
        'PEI': 'Prince Edward Island',
        'Repatriated': 'Repatriated Canada',
        'Repatriated Cdn': 'Repatriated Canada'
    }
    value = titleCase(value)
    if value in province_map.keys():
        new_province = province_map[value]
        return new_province
    else:
        return value

canada_source_request = requests.get(canada_source_csv).content
canada_df = pd.read_csv(io.StringIO(canada_source_request.decode('utf-8')))
currentTime = datetime.now()

print("Original Canada Columns")
print(canada_df.columns)
canada_df.rename(columns = {
    'Province': 'State/Province',
    'SummaryDate': 'SARS-CoV-2 Source Date',
    'TotalCases': 'Positive Total','DailyTotals': 'Positive Daily',
    'TotalRecovered' : 'Recovered Total','DailyRecovered': 'Recovered Daily',
    'TotalDeaths': 'Deaths Total','DailyDeaths': 'Deaths Daily',
    'TotalTested': 'Tests Total','DailyTested': 'Tests Daily',
    'TotalActive': 'Active Total','DailyActive': 'Active Daily',
    'TotalHospitalized': 'Hospitalized Total','DailyHospitalized': 'Hospitalized Daily',
    'TotalICU': 'ICU Total', 'DailyICU': 'ICU Daily'
}, inplace = True)
print("Renamed Canada Columns")
print(canada_df.columns)

canada_df.drop(columns=["OBJECTID"], inplace = True)
canada_df["Downloaded"] = currentTime
canada_df["Country"] = "Canada"
canada_df["Region"] = "North America"
canada_df["State/Province"] = canada_df["State/Province"].apply(lambda x: fixProvince(x))
canada_df["Date"] = canada_df["SARS-CoV-2 Source Date"].apply(lambda x: us_date(x))
canada_df["Level"] = canada_df["State/Province"].apply(lambda x: "Country" if x == "Canada" else "State/Province")
canada_df["SARS-CoV-2 Source"] = canada_source_csv
string_columns = ["State/Province","Abbreviation","Country","Region"]

for col in string_columns:
    canada_df[col] = canada_df[col].astype(str)
    print(canada_df[col].sort_values(ascending = True).unique())

print(canada_df.columns)
print(canada_df.dtypes)
canada_df = canada_df.sort_values(by=["Level","State/Province","Date"])

population_cn = pd.read_excel(canada_population)
population_cn.rename(columns = {"GEO": "State/Province",
                                "VALUE": "Population",
                                "REF_DATE": "Quarter"}, inplace = True)
population_cn = population_cn[["Quarter","State/Province","Population"]]
last_quarter = population_cn["Quarter"].max()
print(last_quarter)
canada_last_population = population_cn.loc[population_cn["Quarter"]==last_quarter].copy()[["State/Province","Population"]]
canada_last_population.reset_index(drop=True,inplace=True)
canada_last_population["Population Source URL"] = canada_population_source

canada_input_order = [
    'Date', 'Level', 'Region', 'Country', 'State/Province', 'Abbreviation', 'Population', 
    'Positive Daily', 'Positive Total', 
    'Deaths Daily', 'Deaths Total', 
    'Tests Daily', 'Tests Total',
    'Active Daily', 'Recovered Daily', 'Recovered Total',  
    'Hospitalized Daily', 'Hospitalized Total', 'ICU Daily', 'ICU Total', 
    'Downloaded', 'SARS-CoV-2 Source', 'SARS-CoV-2 Source Date', 'Population Source URL'
]
canada_input = pd.merge(canada_df, canada_last_population, on="State/Province")
canada_input = canada_input[canada_input_order]

canada_input["State/Province"] = canada_input.apply(lambda x: "" if (x["Level"]=="Country" or x["Level"]=="Region") else x["State/Province"], axis=1)
canada_input["Country"] = canada_input.apply(lambda x: "" if x["Level"]=="Region" else x["Country"], axis=1)

print(canada_input.columns)
print_column_unique(canada_input["State/Province"])
canada_input.to_excel(cleanedFolder + "Canada_input.xlsx", index = False)
canada_input.head()

Original Canada Columns
Index(['OBJECTID', 'Province', 'Abbreviation', 'DailyTotals', 'SummaryDate',
       'TotalCases', 'TotalRecovered', 'DailyRecovered', 'TotalDeaths',
       'DailyDeaths', 'TotalTested', 'DailyTested', 'TotalActive',
       'DailyActive', 'TotalHospitalized', 'DailyHospitalized', 'TotalICU',
       'DailyICU'],
      dtype='object')
Renamed Canada Columns
Index(['OBJECTID', 'State/Province', 'Abbreviation', 'Positive Daily',
       'SARS-CoV-2 Source Date', 'Positive Total', 'Recovered Total',
       'Recovered Daily', 'Deaths Total', 'Deaths Daily', 'Tests Total',
       'Tests Daily', 'Active Total', 'Active Daily', 'Hospitalized Total',
       'Hospitalized Daily', 'ICU Total', 'ICU Daily'],
      dtype='object')
['Alberta' 'British Columbia' 'Canada' 'Manitoba' 'New Brunswick'
 'Newfoundland and Labrador' 'Northwest Territories' 'Nova Scotia'
 'Nunavut' 'Ontario' 'Prince Edward Island' 'Quebec' 'Repatriated Canada'
 'Saskatchewan' 'Yukon']
['AB' 'BC' 'CA' 'MB

Unnamed: 0,Date,Level,Region,Country,State/Province,Abbreviation,Population,Positive Daily,Positive Total,Deaths Daily,Deaths Total,Tests Daily,Tests Total,Active Daily,Recovered Daily,Recovered Total,Hospitalized Daily,Hospitalized Total,ICU Daily,ICU Total,Downloaded,SARS-CoV-2 Source,SARS-CoV-2 Source Date,Population Source URL
0,01/25 /2020,Country,North America,Canada,,CA,38005238,1,1,0,0,0,0,1.0,0,0,,,,,2020-11-14 22:14:50.280178,https://opendata.arcgis.com/datasets/3afa9ce11...,2020/01/25 12:00:00+00,https://www150.statcan.gc.ca/t1/tbl1/en/tv.act...
1,01/26 /2020,Country,North America,Canada,,CA,38005238,0,1,0,0,0,0,0.0,0,0,,,,,2020-11-14 22:14:50.280178,https://opendata.arcgis.com/datasets/3afa9ce11...,2020/01/26 12:00:00+00,https://www150.statcan.gc.ca/t1/tbl1/en/tv.act...
2,01/27 /2020,Country,North America,Canada,,CA,38005238,1,2,0,0,0,0,1.0,0,0,,,,,2020-11-14 22:14:50.280178,https://opendata.arcgis.com/datasets/3afa9ce11...,2020/01/27 12:00:00+00,https://www150.statcan.gc.ca/t1/tbl1/en/tv.act...
3,01/28 /2020,Country,North America,Canada,,CA,38005238,1,3,0,0,0,0,1.0,0,0,,,,,2020-11-14 22:14:50.280178,https://opendata.arcgis.com/datasets/3afa9ce11...,2020/01/28 12:00:00+00,https://www150.statcan.gc.ca/t1/tbl1/en/tv.act...
4,01/29 /2020,Country,North America,Canada,,CA,38005238,0,3,0,0,0,0,0.0,0,0,,,,,2020-11-14 22:14:50.280178,https://opendata.arcgis.com/datasets/3afa9ce11...,2020/01/29 12:00:00+00,https://www150.statcan.gc.ca/t1/tbl1/en/tv.act...


In [371]:
pgmm_cn = pd.read_excel(canada_r)
pgmm_cn["Region"] = "North America"
pgmm_cn["Country"] = pgmm_cn["Country"].astype(str)
pgmm_cn["State/Province"] = pgmm_cn["State/Province"].astype(str)
pgmm_cn.rename(columns = {'7 Day Moving Average': '7 Day Moving Average New Cases',
                          '7 Day Moving Average.1': '7 Day Moving Average Deaths',
                          'Deaths': 'New Deaths'}, inplace = True)
pgmm_cn["Level"] = pgmm_cn["State/Province"].apply(lambda x: "Country" if x == "Canada" else "State/Province")
print_column_unique(pgmm_cn['State/Province'])
print(pgmm_cn.dtypes)
pgmm_cn.to_excel(cleanedFolder + "Canada_cleaned.xlsx", index = False)
pgmm_cn.head(30)

Column Values:
['Alberta' 'British Columbia' 'Canada' 'Manitoba' 'New Brunswick'
 'Newfoundland and Labrador' 'Northwest Territories' 'Nova Scotia'
 'Ontario' 'Prince Edward Island' 'Quebec' 'Saskatchewan' 'Yukon']
Date                              datetime64[ns]
Country                                   object
State/Province                            object
New Cases                                  int64
Cumulative Cases                           int64
7 Day Moving Average New Cases           float64
Infection Rate                           float64
New Deaths                                 int64
Cumulative Deaths                          int64
7 Day Moving Average Deaths              float64
Death Rate                               float64
Speed                                    float64
Acceleration                             float64
Jerk                                     float64
1 Day Persistence                        float64
7 Day Persistence                        float64
R

Unnamed: 0,Date,Country,State/Province,New Cases,Cumulative Cases,7 Day Moving Average New Cases,Infection Rate,New Deaths,Cumulative Deaths,7 Day Moving Average Deaths,Death Rate,Speed,Acceleration,Jerk,1 Day Persistence,7 Day Persistence,Region,Level
0,2020-10-13,Canada,Alberta,961,20956,249.285714,21.732857,4,286,0.714286,6.467843,5.637556,2.213023,4.080363,1.394297,2.183653,North America,State/Province
1,2020-10-13,Canada,British Columbia,549,10734,127.571429,10.664932,5,250,0.857143,4.856527,2.478216,1.240496,2.234002,0.503937,1.192876,North America,State/Province
2,2020-10-13,Canada,Manitoba,124,2779,76.142857,8.990309,1,35,1.571429,2.537587,5.520547,0.714667,0.4453724,1.956712,1.575432,North America,State/Province
3,2020-10-13,Canada,New Brunswick,6,284,11.285714,0.767778,0,2,0.0,0.255926,1.444154,0.073122,0.0,0.558215,0.04745,North America,State/Province
4,2020-10-13,Canada,Newfoundland and Labrador,0,283,0.857143,0.0,0,4,0.0,0.766132,0.164171,0.0,-1.982541e-17,0.066842,0.056818,North America,State/Province
5,2020-10-13,Canada,Northwest Territories,0,5,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,North America,State/Province
6,2020-10-13,Canada,Nova Scotia,0,1092,0.428571,0.0,0,65,0.0,6.637048,0.043761,0.0,0.0,0.017817,0.015145,North America,State/Province
7,2020-10-13,Canada,Ontario,1553,60692,761.428571,10.540237,12,3017,4.285714,20.47643,5.167828,0.974422,1.57071,1.707344,2.152768,North America,State/Province
8,2020-10-13,Canada,Prince Edward Island,2,63,0.285714,1.252937,0,0,0.0,0.0,0.178991,0.0,0.0,0.072876,0.09292,North America,State/Province
9,2020-10-13,Canada,Quebec,815,87791,968.142857,9.504849,5,5970,10.142857,69.624475,11.290861,-0.914665,-0.3348772,4.969474,6.542116,North America,State/Province


In [372]:
census_regions = {
    0: {"name" : "United States",
        "states" : ["United States"]},
    1: {"name" : "Northeast",
        "states" :["Connecticut", "Maine", "New Hampshire", "Vermont", "Massachusetts", 
                   "Rhode Island", "New Jersey", "New York", "Pennsylvania"]},
    3: {"name" : "South",
        "states" : ["Maryland", "Delaware", "West Virginia", "Virginia", "Kentucky", 
                    "Tennessee", "North Carolina", "South Carolina", "Georgia", "Florida", 
                    "Alabama", "Mississippi", "Arkansas", "Louisiana", "Oklahoma", "Texas", 
                    "District of Columbia", "Puerto Rico"]},
    2: {"name" : "Midwest",
        "states" : ["North Dakota", "South Dakota", "Nebraska", "Kansas", "Missouri", "Iowa", 
                    "Minnesota", "Wisconsin", "Illinois", "Michigan", "Indiana", "Ohio"]},
    4: {"name" : "West",
        "states" : ["Washington", "Idaho", "Montana", "Wyoming", "Oregon", "California", "Nevada", 
                    "Utah", "Colorado", "Arizona", "New Mexico", "Alaska", "Hawaii"]}
}
us_state_codes = pd.read_excel(us_codes)
def regionByState(state):
    for key in census_regions.keys():
        if (state in census_regions[key]["states"]):
            return census_regions[key]["name"]
us_state_codes["Census Region"] = us_state_codes["State Name"].apply(lambda x: regionByState(x))
us_state_codes.rename(columns = {'State Name':'State/Province'}, inplace = True)
us_state_codes.head()
us_states_request = requests.get(us_source_csv).content
states=pd.read_csv(io.StringIO(us_states_request.decode('utf-8')))
currentTime = datetime.now()
states["SARS-CoV-2 Accessed"] = currentTime
states["Country"] = "United States"
printColumns(states, "Pre Rename Columns")
states = states.drop(
    columns = [
        'deathConfirmed', 'deathProbable',
        'hospitalized',
        'negativeTestsAntibody', 'negativeTestsPeopleAntibody', 'negativeTestsViral',
        'positiveScore', 'positiveTestsAntibody', 'positiveTestsAntigen',
        'positiveTestsPeopleAntibody', 'positiveTestsPeopleAntigen',
        'positiveTestsViral', 'positiveCasesViral',
        'totalTestEncountersViral', 'totalTestEncountersViralIncrease',
        'totalTestsAntibody', 'totalTestsAntigen',
        'totalTestsPeopleAntibody', 'totalTestsPeopleAntigen',
        'totalTestsPeopleViral', 'totalTestsPeopleViralIncrease',
        'totalTestsViral', 'totalTestsViralIncrease'
    ])
states.rename(
    columns = {
        'date': 'Date', 'state' : 'State Abbreviation', 'dataQualityGrade': 'Data Quality',
        'totalTestResults' : 'Total Tests', 'totalTestResultsIncrease' : 'Tests Daily',
        'negative' : 'Total Negative', 'negativeIncrease' : 'Negative Daily',
        'positive' : 'Total Positive', 'positiveIncrease' : 'Positive Daily',
        'recovered' : 'Total Recovered',
        'death' : 'Total Deaths', 'deathIncrease' : 'Deaths Daily',
        'hospitalizedCumulative' : 'Total Hospitalized', 'hospitalizedIncrease' : 'Hospitalized Daily', 'hospitalizedCurrently' : 'Currently Hospitalized',
        'inIcuCumulative' : 'Total In ICU', 'inIcuCurrently' : 'Currently In ICU',
        'onVentilatorCumulative' : 'Total On Ventilator', 'onVentilatorCurrently' : 'Currently On Ventilator'
    }, inplace = True)
states["Date"] = states["Date"].astype(str)
states["Date"] = states["Date"].apply(lambda x: us_date(x))
states["SARS-CoV-2 Source Date"] = pd.to_datetime(states["Date"], format="%m/%d/%Y")
states["Level"] = "State/Province"
states["Region"] = "North America"
states["SARS-CoV-2 Source"] = us_source_csv
printColumns(states, "Post Rename Columns")
states_input = pd.merge(states, us_state_codes, how="left", on="State Abbreviation")
printColumns(states_input,"States Input")
merge_order = [
    'Date', 'Level', 'Region', 'Country', 'State Abbreviation', 'State/Province', 'FIPS', 'Status', 'Data Quality', 
    'Total Deaths', 'Deaths Daily', 'Total Recovered',
    'Total Tests', 'Tests Daily', 
    'Total Negative', 'Negative Daily', 'Total Positive', 'Positive Daily',    
    'Total Hospitalized', 'Currently Hospitalized', 'Hospitalized Daily',
    'Total In ICU', 'Currently In ICU',
    'Total On Ventilator', 'Currently On Ventilator', 
    'SARS-CoV-2 Accessed', 'SARS-CoV-2 Source Date', 'SARS-CoV-2 Source'
]
states_input = states_input[merge_order]
printColumns(states_input, "States Input Merge Columns")

us_summary_cols = [
    'Date', 'SARS-CoV-2 Source Date', 'Country',
    'Total Deaths', 'Deaths Daily', 'Total Recovered',
    'Total Tests', 'Tests Daily', 'Total Negative', 'Negative Daily', 'Total Positive', 'Positive Daily',
    'Total Hospitalized', 'Currently Hospitalized', 'Hospitalized Daily', 
    'Total In ICU', 'Currently In ICU', 'Total On Ventilator', 'Currently On Ventilator'
]
us_stats = states_input[us_summary_cols].groupby(['Date','SARS-CoV-2 Source Date','Country']).sum().reset_index()
us_stats["FIPS"] = 0
us_stats["State Abbreviation"] = "US"
us_stats["State/Province"] = "" 
us_stats["Status"] = 0
us_stats["SARS-CoV-2 Accessed"] = currentTime
us_stats["Data Quality"] = ""
us_stats["Level"] = "Country"
us_stats["Region"] = "North America"
us_stats["SARS-CoV-2 Source"] = us_source_csv
printColumns(us_stats, "US Stats Columns")
us_stats = us_stats[merge_order]

us_stats["State/Province"] = us_stats.apply(lambda x: "" if (x["Level"]=="Country" or x["Level"]=="Region") else x["State/Province"], axis=1)
us_stats["State Abbreviation"] = us_stats.apply(lambda x: "" if (x["Level"]=="Country" or x["Level"]=="Region") else x["State Abbreviation"], axis=1)
us_stats["Country"] = us_stats.apply(lambda x: "" if x["Level"]=="Region" else x["Country"], axis=1)


us_input_cleaned = pd.concat([states_input,us_stats], ignore_index=True)
us_input_cleaned.to_excel(cleanedFolder + "US_input_cleaned.xlsx", index = False)
us_input_cleaned.head(-10)

Pre Rename Columns
Index(['date', 'state', 'dataQualityGrade', 'death', 'deathConfirmed',
       'deathIncrease', 'deathProbable', 'hospitalized',
       'hospitalizedCumulative', 'hospitalizedCurrently',
       'hospitalizedIncrease', 'inIcuCumulative', 'inIcuCurrently', 'negative',
       'negativeIncrease', 'negativeTestsAntibody',
       'negativeTestsPeopleAntibody', 'negativeTestsViral',
       'onVentilatorCumulative', 'onVentilatorCurrently', 'positive',
       'positiveCasesViral', 'positiveIncrease', 'positiveScore',
       'positiveTestsAntibody', 'positiveTestsAntigen',
       'positiveTestsPeopleAntibody', 'positiveTestsPeopleAntigen',
       'positiveTestsViral', 'recovered', 'totalTestEncountersViral',
       'totalTestEncountersViralIncrease', 'totalTestResults',
       'totalTestResultsIncrease', 'totalTestsAntibody', 'totalTestsAntigen',
       'totalTestsPeopleAntibody', 'totalTestsPeopleAntigen',
       'totalTestsPeopleViral', 'totalTestsPeopleViralIncrease',
     

Unnamed: 0,Date,Level,Region,Country,State Abbreviation,State/Province,FIPS,Status,Data Quality,Total Deaths,Deaths Daily,Total Recovered,Total Tests,Tests Daily,Total Negative,Negative Daily,Total Positive,Positive Daily,Total Hospitalized,Currently Hospitalized,Hospitalized Daily,Total In ICU,Currently In ICU,Total On Ventilator,Currently On Ventilator,SARS-CoV-2 Accessed,SARS-CoV-2 Source Date,SARS-CoV-2 Source
0,11/14/2020,State/Province,North America,United States,AK,Alaska,2,0,A,98.0,2,7162.0,862264.0,17598,839859.0,17005,22405.0,593,,125.0,0,,,,11.0,2020-11-14 21:49:32.202067,2020-11-14,https://covidtracking.com/data/download/all-st...
1,11/14/2020,State/Province,North America,United States,AL,Alabama,1,0,A,3246.0,15,88038.0,1453155.0,12280,1271260.0,10660,215843.0,2226,22275.0,1120.0,0,2151.0,,1244.0,,2020-11-14 21:49:32.202067,2020-11-14,https://covidtracking.com/data/download/all-st...
2,11/14/2020,State/Province,North America,United States,AR,Arkansas,5,0,A+,2148.0,0,112383.0,1483864.0,0,1367077.0,0,130318.0,0,7847.0,799.0,30,,291.0,894.0,114.0,2020-11-14 21:49:32.202067,2020-11-14,https://covidtracking.com/data/download/all-st...
3,11/14/2020,State/Province,North America,United States,AS,American Samoa,60,1,D,0.0,0,,1768.0,0,1768.0,0,0.0,0,,,0,,,,,2020-11-14 21:49:32.202067,2020-11-14,https://covidtracking.com/data/download/all-st...
4,11/14/2020,State/Province,North America,United States,AZ,Arizona,4,0,A+,6300.0,43,45036.0,1961414.0,18698,1695606.0,15344,273053.0,3476,22937.0,1470.0,117,,362.0,,189.0,2020-11-14 21:49:32.202067,2020-11-14,https://covidtracking.com/data/download/all-st...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14674,10/31/2020,Country,North America,United States,,,0,0,,222319.0,963,3612517.0,146136155.0,1373818,120065801.0,1038275,9046233.0,90492,475074.0,47375.0,2074,24375.0,9499.0,2786.0,2502.0,2020-11-14 21:49:32.202067,2020-10-31,https://covidtracking.com/data/download/all-st...
14675,11/01/2020,Country,North America,United States,,,0,0,,222710.0,391,3633903.0,147275722.0,1139567,120882046.0,816245,9120284.0,74051,476228.0,47520.0,1154,24457.0,9553.0,2797.0,2553.0,2020-11-14 21:49:32.202067,2020-11-01,https://covidtracking.com/data/download/all-st...
14676,11/02/2020,Country,North America,United States,,,0,0,,223186.0,476,3674981.0,148603687.0,1327965,121954420.0,1072374,9202532.0,82248,477730.0,48470.0,1502,24560.0,9858.0,2809.0,2637.0,2020-11-14 21:49:32.202067,2020-11-02,https://covidtracking.com/data/download/all-st...
14677,11/03/2020,Country,North America,United States,,,0,0,,224715.0,1529,3705130.0,149729485.0,1125798,122794377.0,839957,9288613.0,86081,480878.0,50340.0,3148,24796.0,10406.0,2833.0,2733.0,2020-11-14 21:49:32.202067,2020-11-03,https://covidtracking.com/data/download/all-st...


In [374]:
c_order = [
    "time","Level",
    "region","name","state","county",
    "new_cases", "all_cum_cases",
    "all_new_neg", "all_cum_neg",
    "hospitalized_currently","hospitalized_cum",
    "new_deaths", "all_cum_deaths",
    "new_tests",
    "SARS-CoV-2 Source","accessed"]
c_renamed = c[c_order].copy()
print(len(c_renamed.columns))
c_renamed.rename(
    columns = {
        "time": "Date",
        "region": "Region",
        "name": "Country",
        "state": "State/Province",
        "county": "County",
        "new_cases": "Positive Daily",
        "all_cum_cases": "Positive Total",
        "all_new_neg": "Negative Daily",
        "all_cum_neg": "Negative Total",
        "hospitalized_currently": "Hospitalized Currently",
        "hospitalized_cum": "Hospitalized Cumulative",
        "new_deaths": "Deaths Daily", 
        "all_cum_deaths": "Deaths Total",
        "new_tests": "Tests Daily",
        "accessed": "SARS-CoV-2 Accessed"
    }, inplace = True)
print("Renamed Columns")
print(c_renamed.columns)
canada_input_cleaned_order = [
    'Date', 'Level', 'Region', 'Country', 'State/Province', 
    'Positive Daily', 'Positive Total', 
    'Deaths Daily', 'Deaths Total', 
    'Tests Daily', 'Tests Total',
    'Active Daily', 'Recovered Daily', 'Recovered Total',  
    'Hospitalized Daily', 'Hospitalized Total', 'ICU Daily', 'ICU Total', 
    'SARS-CoV-2 Source', 'SARS-CoV-2 Source Date'
]
canada_input_cleaned = canada_input[canada_input_cleaned_order].copy()
canada_input_cleaned = canada_input_cleaned.rename(columns = {"SARS-CoV-2 Source Date": "SARS-CoV-2 Accessed"})
canada_input_cleaned.head()

17
Renamed Columns
Index(['Date', 'Level', 'Region', 'Country', 'State/Province', 'County',
       'Positive Daily', 'Positive Total', 'Negative Daily', 'Negative Total',
       'Hospitalized Currently', 'Hospitalized Cumulative', 'Deaths Daily',
       'Deaths Total', 'Tests Daily', 'SARS-CoV-2 Source',
       'SARS-CoV-2 Accessed'],
      dtype='object')


Unnamed: 0,Date,Level,Region,Country,State/Province,Positive Daily,Positive Total,Deaths Daily,Deaths Total,Tests Daily,Tests Total,Active Daily,Recovered Daily,Recovered Total,Hospitalized Daily,Hospitalized Total,ICU Daily,ICU Total,SARS-CoV-2 Source,SARS-CoV-2 Accessed
0,01/25 /2020,Country,North America,Canada,,1,1,0,0,0,0,1.0,0,0,,,,,https://opendata.arcgis.com/datasets/3afa9ce11...,2020/01/25 12:00:00+00
1,01/26 /2020,Country,North America,Canada,,0,1,0,0,0,0,0.0,0,0,,,,,https://opendata.arcgis.com/datasets/3afa9ce11...,2020/01/26 12:00:00+00
2,01/27 /2020,Country,North America,Canada,,1,2,0,0,0,0,1.0,0,0,,,,,https://opendata.arcgis.com/datasets/3afa9ce11...,2020/01/27 12:00:00+00
3,01/28 /2020,Country,North America,Canada,,1,3,0,0,0,0,1.0,0,0,,,,,https://opendata.arcgis.com/datasets/3afa9ce11...,2020/01/28 12:00:00+00
4,01/29 /2020,Country,North America,Canada,,0,3,0,0,0,0,0.0,0,0,,,,,https://opendata.arcgis.com/datasets/3afa9ce11...,2020/01/29 12:00:00+00


In [375]:
c_renamed = pd.concat([c_renamed, canada_input_cleaned], ignore_index=True, sort=True)
c_renamed = pd.concat([c_renamed, us_input_cleaned], ignore_index=True, sort=True)
c_renamed = c_renamed[merge_order]
print(c_renamed.columns)
#c_renamed_file = cleanedFolder + "input_cleaned.xlsx"
#c_renamed.to_excel(c_renamed_file, index = False)
c_renamed.head()

Index(['Date', 'Level', 'Region', 'Country', 'State Abbreviation',
       'State/Province', 'FIPS', 'Status', 'Data Quality', 'Total Deaths',
       'Deaths Daily', 'Total Recovered', 'Total Tests', 'Tests Daily',
       'Total Negative', 'Negative Daily', 'Total Positive', 'Positive Daily',
       'Total Hospitalized', 'Currently Hospitalized', 'Hospitalized Daily',
       'Total In ICU', 'Currently In ICU', 'Total On Ventilator',
       'Currently On Ventilator', 'SARS-CoV-2 Accessed',
       'SARS-CoV-2 Source Date', 'SARS-CoV-2 Source'],
      dtype='object')


Unnamed: 0,Date,Level,Region,Country,State Abbreviation,State/Province,FIPS,Status,Data Quality,Total Deaths,Deaths Daily,Total Recovered,Total Tests,Tests Daily,Total Negative,Negative Daily,Total Positive,Positive Daily,Total Hospitalized,Currently Hospitalized,Hospitalized Daily,Total In ICU,Currently In ICU,Total On Ventilator,Currently On Ventilator,SARS-CoV-2 Accessed,SARS-CoV-2 Source Date,SARS-CoV-2 Source
0,02/24/2020,Country,South Asia,Afghanistan,,,,,,,0,,,0,,,,1,,,,,,,,11/14/2020,NaT,https://github.com/dsbbfinddx/FINDCov19Tracker...
1,02/25/2020,Country,South Asia,Afghanistan,,,,,,,0,,,0,,,,0,,,,,,,,11/14/2020,NaT,https://github.com/dsbbfinddx/FINDCov19Tracker...
2,02/26/2020,Country,South Asia,Afghanistan,,,,,,,0,,,0,,,,0,,,,,,,,11/14/2020,NaT,https://github.com/dsbbfinddx/FINDCov19Tracker...
3,02/27/2020,Country,South Asia,Afghanistan,,,,,,,0,,,0,,,,0,,,,,,,,11/14/2020,NaT,https://github.com/dsbbfinddx/FINDCov19Tracker...
4,02/28/2020,Country,South Asia,Afghanistan,,,,,,,0,,,0,,,,0,,,,,,,,11/14/2020,NaT,https://github.com/dsbbfinddx/FINDCov19Tracker...


In [376]:
pgmm_us = pd.read_excel(us_r)
pgmm_us = pd.merge(pgmm_us,us_state_codes,on='State Abbreviation')
pgmm_us["Region"] = "North America"
pgmm_us["Region"] = pgmm_us["Region"].astype(str)
pgmm_us["Country"] = "United States"
pgmm_us["Country"] = pgmm_us["Country"].astype(str)
pgmm_us.rename(columns = {'7 Day Moving Average': '7 Day Moving Average New Cases',
                          '7 Day Moving Average.1': '7 Day Moving Average Deaths',
                          'Deaths': 'New Deaths',
                          'First Day of Week': 'Date'
                         }, inplace = True)
print_column_unique(pgmm_us["State/Province"])
pgmm_us["Level"] = pgmm_us["State/Province"].apply(lambda x: "Country" if x == "United States" else "State/Province")
print(pgmm_us.dtypes)
pgmm_us.to_excel(cleanedFolder + "US_cleaned.xlsx", index = False)
pgmm_us.head()

Column Values:
['Alabama' 'Alaska' 'Arizona' 'Arkansas' 'California' 'Colorado'
 'Connecticut' 'Delaware' 'District of Columbia' 'Florida' 'Georgia'
 'Hawaii' 'Idaho' 'Illinois' 'Indiana' 'Iowa' 'Kansas' 'Kentucky'
 'Louisiana' 'Maine' 'Maryland' 'Massachusetts' 'Michigan' 'Minnesota'
 'Mississippi' 'Missouri' 'Montana' 'Nebraska' 'Nevada' 'New Hampshire'
 'New Jersey' 'New Mexico' 'New York' 'North Carolina' 'North Dakota'
 'Ohio' 'Oklahoma' 'Oregon' 'Pennsylvania' 'Rhode Island' 'South Carolina'
 'South Dakota' 'Tennessee' 'Texas' 'United States' 'Utah' 'Vermont'
 'Virginia' 'Washington' 'West Virginia' 'Wisconsin' 'Wyoming']
Date                              datetime64[ns]
State Abbreviation                        object
New Cases                                  int64
Cumulative Cases                           int64
7 Day Moving Average New Cases           float64
Infection Rate                           float64
New Deaths                                 int64
Cumulative Deaths    

Unnamed: 0,Date,State Abbreviation,New Cases,Cumulative Cases,7 Day Moving Average New Cases,Infection Rate,New Deaths,Cumulative Deaths,7 Day Moving Average Deaths,Death Rate,Speed,Acceleration,Jerk,1 Day Persistence,7 Day Persistence,State/Province,FIPS,Status,Census Region,Region,Country,Level
0,2020-09-30,AK,106,8780,119.857143,14.489881,0,56,1.571429,0.0,16.384111,0.995935,-0.507732,3.776715,9.83158,Alaska,2,0,West,North America,United States,State/Province
1,2020-10-07,AK,274,9861,154.428571,37.454975,1,59,0.428571,0.136697,21.109921,3.280728,5.721745,4.1753,14.011924,Alaska,2,0,West,North America,United States,State/Province
2,2020-09-30,AL,1147,154701,1078.285714,23.392958,23,2540,7.428571,0.469083,21.991536,1.684037,2.362896,4.579974,15.061534,Alabama,1,0,South,North America,United States,State/Province
3,2020-10-07,AL,941,161418,959.571429,19.191607,21,2601,8.714286,0.428293,19.57037,-0.600193,-1.16251,4.557614,18.576065,Alabama,1,0,South,North America,United States,State/Province
4,2020-09-30,AR,942,83697,819.142857,31.214751,19,1369,20.0,0.629597,27.143673,-0.189352,-0.610662,5.727148,22.808155,Arkansas,5,0,South,North America,United States,State/Province


In [378]:
all_configured = pd.concat([pgmm_ssa, pgmm_sa], sort=False)
all_configured = pd.concat([all_configured, pgmm_la], sort=False)
all_configured = pd.concat([all_configured, pgmm_ca], sort=False)
all_configured = pd.concat([all_configured, pgmm_eu], sort=False)
all_configured = pd.concat([all_configured, pgmm_me], sort=False)
all_configured = pd.concat([all_configured, pgmm_ep], sort=False)
all_configured = pd.concat([all_configured, pgmm_cn], sort=False)
all_configured = pd.concat([all_configured, pgmm_us], sort=False)
all_regions = [
    'Central Asia', 'East Asia and Pacific', 'Europe', 'Latin America', 
    'Middle East and North Africa', 'North America', 'South Asia', 'Sub-Saharan Africa'
]
us_states = [
    'Alabama','Alaska','Arizona','Arkansas','California','Colorado','Connecticut','Delaware','District of Columbia',
    'Florida','Georgia','Hawaii','Idaho','Illinois','Indiana','Iowa','Kansas','Kentucky','Louisiana',
    'Maine','Maryland','Massachusetts','Michigan','Minnesota','Mississippi','Missouri','Montana','Nebraska',
    'Nevada','New Hampshire','New Jersey','New Mexico','New York','North Carolina','North Dakota',
    'Ohio','Oklahoma','Oregon','Pennsylvania','Rhode Island','South Carolina','South Dakota','Tennessee','Texas',
    'Utah','Vermont','Virginia','Washington','West Virginia','Wisconsin','Wyoming'
]
canada_provinces = [
    'Alberta','British Columbia','Manitoba','New Brunswick','Newfoundland and Labrador','Northwest Territories',
    'Nova Scotia','Ontario','Prince Edward Island','Quebec','Saskatchewan','Yukon'
]
states_and_provinces = us_states + canada_provinces
all_configured["Level"] = all_configured.apply(lambda x: "Region" if x["Country"] in all_regions else "State/Province" if x["State/Province"] in states_and_provinces else "Country", axis = 1)
all_configured.reset_index(drop=True,inplace=True)
print(all_configured.columns)
print_column_missing(all_configured["Country"],all_countries)

all_configured.to_excel(local_download + "all_configured_regions.xlsx", index = False)
all_configured.head()

Index(['Date', 'Country', 'New COVID Cases', 'Cumulative COVID Cases',
       '7 Day Moving Average New Cases', 'Rate of Infection', 'New Deaths',
       'Cumulative Deaths', '7 Day Moving Average of Death Rate', 'Death Rate',
       'Speed', 'Acceleration', 'Jerk', '1 Day Persistence',
       '7 Day Persistence', 'Region', 'Level', '7 Day Moving Average Deaths',
       'New Cases', 'Cumulative Cases', 'Infection Rate', 'State/Province',
       'State Abbreviation', 'FIPS', 'Status', 'Census Region'],
      dtype='object')
Column Values:
['Afghanistan' 'Algeria' 'Angola' 'Antigua & Barbuda' 'Argentina'
 'Armenia' 'Australia' 'Austria' 'Azerbaijan' 'Bahamas' 'Bahrain'
 'Bangladesh' 'Barbados' 'Belarus' 'Belgium' 'Belize' 'Benin' 'Bhutan'
 'Bolivia' 'Botswana' 'Brazil' 'Brunei' 'Bulgaria' 'Burkina Faso'
 'Burundi' 'Cabo Verde' 'Cameroon' 'Canada' 'Central African Republic'
 'Central Asia' 'Chad' 'Chile' 'China' 'Colombia' 'Comoros' 'Costa Rica'
 'Croatia' 'Cuba' 'Cyprus' 'Czech Republic'

Unnamed: 0,Date,Country,New COVID Cases,Cumulative COVID Cases,7 Day Moving Average New Cases,Rate of Infection,New Deaths,Cumulative Deaths,7 Day Moving Average of Death Rate,Death Rate,Speed,Acceleration,Jerk,1 Day Persistence,7 Day Persistence,Region,Level,7 Day Moving Average Deaths,New Cases,Cumulative Cases,Infection Rate,State/Province,State Abbreviation,FIPS,Status,Census Region
0,2020-09-08,Angola,52.0,3033.0,43.428571,0.163392,4,124,2.142857,0.012569,0.136459,-0.010324,-0.00404,4.369557,6.340465,Sub-Saharan Africa,Country,,,,,,,,,
1,2020-09-15,Angola,130.0,3569.0,76.571429,0.40848,3,139,2.142857,0.009426,0.240599,0.035013,0.019302,6.120053,4.321752,Sub-Saharan Africa,Country,,,,,,,,,
2,2020-09-08,Benin,0.0,2213.0,9.714286,0.0,0,40,0.0,0.0,0.082316,0.0,0.0,0.908654,0.426489,Sub-Saharan Africa,Country,,,,,,,,,
3,2020-09-15,Benin,0.0,2267.0,7.714286,0.0,0,40,0.0,0.0,0.065369,0.0,0.0,0.721578,0.966708,Sub-Saharan Africa,Country,,,,,,,,,
4,2020-09-08,Botswana,0.0,2126.0,57.428571,0.0,0,9,0.428571,0.0,2.492887,-0.56431,-1.333261,6.587742,2.303039,Sub-Saharan Africa,Country,,,,,,,,,


In [379]:
pgmm_columns = [
    'Date', 'Level','Region', 'Country', 'Census Region', 'State/Province',
    'New COVID Cases', 'Cumulative COVID Cases', 'New Cases', 'Cumulative Cases', 'Infection Rate', 
    '7 Day Moving Average New Cases', 'Infection Rate',
    'New Deaths', 'Death Rate', 'Cumulative Deaths', '7 Day Moving Average Deaths',  
    'Speed', 'Acceleration', 'Jerk', '1 Day Persistence', '7 Day Persistence'
]
pgmm_configured = all_configured[pgmm_columns].copy()
#pgmm_configured.to_excel(cleanedFolder + "pgmm.xlsx", index = False)
pgmm_configured.head()

Unnamed: 0,Date,Level,Region,Country,Census Region,State/Province,New COVID Cases,Cumulative COVID Cases,New Cases,Cumulative Cases,Infection Rate,7 Day Moving Average New Cases,Infection Rate.1,New Deaths,Death Rate,Cumulative Deaths,7 Day Moving Average Deaths,Speed,Acceleration,Jerk,1 Day Persistence,7 Day Persistence
0,2020-09-08,Country,Sub-Saharan Africa,Angola,,,52.0,3033.0,,,,43.428571,,4,0.012569,124,,0.136459,-0.010324,-0.00404,4.369557,6.340465
1,2020-09-15,Country,Sub-Saharan Africa,Angola,,,130.0,3569.0,,,,76.571429,,3,0.009426,139,,0.240599,0.035013,0.019302,6.120053,4.321752
2,2020-09-08,Country,Sub-Saharan Africa,Benin,,,0.0,2213.0,,,,9.714286,,0,0.0,40,,0.082316,0.0,0.0,0.908654,0.426489
3,2020-09-15,Country,Sub-Saharan Africa,Benin,,,0.0,2267.0,,,,7.714286,,0,0.0,40,,0.065369,0.0,0.0,0.721578,0.966708
4,2020-09-08,Country,Sub-Saharan Africa,Botswana,,,0.0,2126.0,,,,57.428571,,0,0.0,9,,2.492887,-0.56431,-1.333261,6.587742,2.303039


In [380]:
def fixUSRegion(code):
    region = census_regions[code]
    region_name = region["name"]
    return region_name

us_states_census_demographics_request = requests.get(us_population_source).content
us_demographics = pd.read_csv(io.StringIO(us_states_census_demographics_request.decode('utf-8')))
currentTime = datetime.now()
us_demographics["Downloaded"] = currentTime
us_demographics["Country"] = "United States"
us_demographics["REGION"] = us_demographics["REGION"].apply(lambda x: fixUSRegion(x))
us_demographics["SEX"] = us_demographics["SEX"].apply(lambda x: fixSex(x))
us_demographics["Age Range"] = us_demographics["AGE"].apply(lambda x: getAgeRange(x))
keep_columns = ["REGION","STATE","NAME","SEX","AGE","POPEST2019_CIV","Downloaded","Country", "Age Range"]
us_demographics = us_demographics[keep_columns]
us_demographics.rename(columns = {'REGION': 'Census Region',
                                  'NAME' : 'State Name',
                                  'STATE' : 'FIPS',
                                  'POPEST2019_CIV' : 'Population 2019',
                                  'SEX' : 'Sex',
                                  'AGE' : 'Age'}, 
                       inplace = True)

us_sex = us_demographics.drop(columns=["Age Range"]).loc[us_demographics["Age"]==999].copy()
us_sex = us_sex.pivot_table(
    index=["Downloaded","Country","Census Region","State Name","FIPS","Sex"],
    columns='Age',
    values = 'Population 2019',
    aggfunc='first'
).reset_index().rename_axis(None, axis=1)
us_sex["Total Population"] = us_sex[999]
us_sex = us_sex.sort_values(["FIPS", "Sex"])
us_sex = us_sex.drop(columns=[999])
us_sex = us_sex.pivot_table(
    index=["Downloaded","Country","Census Region","State Name","FIPS"],
    columns='Sex',
    values = 'Total Population',
    aggfunc='first'
).reset_index().rename_axis(None, axis=1)
print(us_sex.columns)
us_sex["Pct Male"] = us_sex["Male"]/us_sex["Population 2019"]
us_sex["Pct Female"] = us_sex["Female"]/us_sex["Population 2019"]
us_sex = us_sex.sort_values(["FIPS"])

us_age = us_demographics[["Census Region","FIPS","State Name","Age", "Age Range", "Population 2019"]].copy()
us_age = us_age.pivot_table(index=["Census Region","FIPS","State Name"], 
                      columns='Age', 
                      values='Population 2019', 
                      aggfunc='first').reset_index().rename_axis(None, axis=1)
us_age["Total Population"] = us_age[999]
us_age["< 1"] = us_age[0]
us_age["1-4"] = us_age[1]+us_age[2]+us_age[3]+us_age[4]
us_age["5-14"] = us_age[5]+us_age[6]+us_age[7]+us_age[8]+us_age[9]+us_age[10]+us_age[11]+us_age[12]+us_age[13]+us_age[14]
us_age["15-24"] = us_age[15]+us_age[16]+us_age[17]+us_age[18]+us_age[19]+us_age[20]+us_age[21]+us_age[22]+us_age[23]+us_age[24]
us_age["25-34"] = us_age[25]+us_age[26]+us_age[27]+us_age[28]+us_age[29]+us_age[30]+us_age[31]+us_age[32]+us_age[33]+us_age[34]
us_age["35-44"] = us_age[35]+us_age[36]+us_age[37]+us_age[38]+us_age[39]+us_age[40]+us_age[41]+us_age[42]+us_age[43]+us_age[44]
us_age["45-54"] = us_age[45]+us_age[46]+us_age[47]+us_age[48]+us_age[49]+us_age[50]+us_age[51]+us_age[52]+us_age[53]+us_age[54]
us_age["55-64"] = us_age[55]+us_age[56]+us_age[57]+us_age[58]+us_age[59]+us_age[60]+us_age[61]+us_age[62]+us_age[63]+us_age[64]
us_age["65-74"] = us_age[65]+us_age[66]+us_age[67]+us_age[68]+us_age[69]+us_age[70]+us_age[71]+us_age[72]+us_age[73]+us_age[74]
us_age["75-84"] = us_age[75]+us_age[76]+us_age[77]+us_age[78]+us_age[79]+us_age[80]+us_age[81]+us_age[82]+us_age[83]+us_age[84]
us_age["85+"] = us_age[85]
us_age["Pct < 1"] = us_age["< 1"]/us_age["Total Population"]
us_age["Pct 1-4"] = us_age["1-4"]/us_age["Total Population"]
us_age["Pct 5-14"] = us_age["5-14"]/us_age["Total Population"]
us_age["Pct 15-24"] = us_age["15-24"]/us_age["Total Population"]
us_age["Pct 25-34"] = us_age["25-34"]/us_age["Total Population"]
us_age["Pct 35-44"] = us_age["35-44"]/us_age["Total Population"]
us_age["Pct 45-54"] = us_age["45-54"]/us_age["Total Population"]
us_age["Pct 55-64"] = us_age["55-64"]/us_age["Total Population"]
us_age["Pct 65-74"] = us_age["65-74"]/us_age["Total Population"]
us_age["Pct 75-84"] = us_age["75-84"]/us_age["Total Population"]
us_age["Pct 85+"] = us_age["85+"]/us_age["Total Population"]
us_age = us_age.drop(columns=["Census Region","State Name",0,999])
age_order = [
    'FIPS', 'Total Population',
    '< 1', 1, 2, 3, 4, 5, 6, 7, 8, 9,
    10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
    20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
    30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
    40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
    50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
    60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
    70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
    80, 81, 82, 83, 84, '85+',
    '1-4', '5-14', '15-24', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84',
    'Pct < 1', 'Pct 1-4', 'Pct 5-14', 'Pct 15-24', 'Pct 25-34', 'Pct 35-44', 'Pct 45-54', 'Pct 55-64', 'Pct 65-74', 'Pct 75-84', 'Pct 85+'
]
us_age = us_age[age_order]
us_age = us_age.sort_values(["FIPS"])

us_state_demographics = pd.merge(us_sex, us_age, how="left", on="FIPS")
us_state_demographics.drop('Census Region', axis=1, inplace=True)
us_state_demographics = pd.merge(us_state_demographics, us_state_codes, how="left", on="FIPS")
us_state_demographics = us_state_demographics.drop(columns=["FIPS", "Status"])
us_state_demographics["Country"] = "United States"
us_state_demographics.rename(
    columns = {"Population 2019": "Population"},
    inplace = True)
us_state_demographics["Population (100K)"] = us_state_demographics["Population"]/100000
us_state_demographics["Population Source"] = us_population_source
demographics_order = [
    'Country', 'State/Province', 'Census Region',
    'Population', 'Population (100K)', 
    'Female', 'Male', 'Pct Male', 'Pct Female',
    '< 1', 1, 2, 3, 4, 5, 6, 7, 8, 9,
    10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
    20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
    30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
    40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
    50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
    60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
    70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
    80, 81, 82, 83, 84, '85+',
    '1-4', '5-14', '15-24', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84',
    'Pct < 1', 'Pct 1-4', 'Pct 5-14', 'Pct 15-24', 'Pct 25-34', 'Pct 35-44', 'Pct 45-54', 'Pct 55-64', 'Pct 65-74', 'Pct 75-84', 'Pct 85+',
    'Population Source'
]
printColumns(us_state_demographics,"US State Demographics Columns")
us_state_demographics = us_state_demographics[demographics_order]
us_state_demographics = us_state_demographics.sort_values(["State/Province"])
us_state_demographics.to_excel(us_population, index = False)
us_state_demographics.head()

Index(['Downloaded', 'Country', 'Census Region', 'State Name', 'FIPS',
       'Female', 'Male', 'Population 2019'],
      dtype='object')
US State Demographics Columns
Index([        'Downloaded',            'Country',         'State Name',
                   'Female',               'Male',         'Population',
                 'Pct Male',         'Pct Female',   'Total Population',
                      '< 1',
       ...
                'Pct 45-54',          'Pct 55-64',          'Pct 65-74',
                'Pct 75-84',            'Pct 85+',     'State/Province',
       'State Abbreviation',      'Census Region',  'Population (100K)',
        'Population Source'],
      dtype='object', length=120)


Unnamed: 0,Country,State/Province,Census Region,Population,Population (100K),Female,Male,Pct Male,Pct Female,< 1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85+,1-4,5-14,15-24,25-34,35-44,45-54,55-64,65-74,75-84,Pct < 1,Pct 1-4,Pct 5-14,Pct 15-24,Pct 25-34,Pct 35-44,Pct 45-54,Pct 55-64,Pct 65-74,Pct 75-84,Pct 85+,Population Source
1,United States,Alabama,South,4889347,48.89347,2531653,2357694,0.48221,0.51779,56901,58290,59073,59799,60294,59568,58599,59537,60023,60241,60897,63083,62906,61883,61729,61740,61799,61924,62938,64125,63587,64201,63943,63719,63922,65079,65208,67027,69478,68758,64852,61469,59980,59615,60721,58941,59921,60346,60696,62200,58159,57993,57852,55498,58174,57008,58838,61959,65460,64750,60738,59494,59786,61321,65925,66906,66695,67073,67308,68221,65605,65211,65365,63117,62042,59584,56766,54694,52697,51707,50567,49884,51612,37091,36845,35441,36173,30575,27572,26053,23977,22580,19594,18222,16660,91543,237456,608466,631898,642187,589780,615279,657543,501447,256847,0.011638,0.048566,0.124447,0.12924,0.131344,0.120626,0.125841,0.134485,0.102559,0.052532,0.018723,https://www2.census.gov/programs-surveys/popes...
2,United States,Alaska,West,712114,7.12114,347065,365049,0.512627,0.487373,9978,10012,10186,10509,10395,10414,10303,10286,10436,10157,9976,10016,9887,9509,9678,9488,9410,9343,8518,7525,8088,8617,9132,9252,9900,10318,10693,11456,11576,11552,10946,10809,10460,10822,10799,10303,10452,9962,9667,9685,8865,8589,8533,7954,8295,7827,7962,8230,8696,9086,8428,8197,8330,8595,9128,9426,9493,9636,9731,9894,9373,9168,9161,8614,8800,8210,7734,7220,6655,6442,5978,5621,5482,4013,3945,3665,3459,2950,2677,2327,1971,1784,1586,1411,1277,7181,41102,100662,89273,109431,92305,84479,93296,61300,23107,0.014012,0.057718,0.141357,0.125363,0.153671,0.129621,0.118631,0.131013,0.086082,0.032448,0.010084,https://www2.census.gov/programs-surveys/popes...
3,United States,Arizona,West,7259090,72.5909,3658425,3600665,0.496022,0.503978,81929,83065,85726,88192,90876,90858,90405,90319,91313,90797,92572,96605,96951,95849,95076,94742,91832,93119,97724,99334,95779,96720,97447,99492,100066,101835,102053,105016,106612,105885,99473,96012,93463,94419,94495,91292,93193,93126,92593,94337,87480,85369,85480,83038,86427,83705,82925,85342,90167,90646,85428,82772,81434,81904,87011,88220,88617,88864,90069,90909,88041,87973,87664,85106,85274,83374,81125,78896,77728,77376,76474,76787,81299,59741,58899,57521,58174,50088,45100,41784,38116,35128,31252,28331,25703,145737,347859,930745,966255,999263,892335,851334,880737,751699,411197,0.011286,0.04792,0.128218,0.13311,0.137657,0.122927,0.117278,0.121329,0.103553,0.056646,0.020076,https://www2.census.gov/programs-surveys/popes...
4,United States,Arkansas,South,3012542,30.12542,1535409,1477133,0.490328,0.509672,36355,37006,37572,38610,38921,38404,37924,38827,38633,38959,38941,40404,41015,40146,39960,39598,39485,39395,38933,39714,40206,40211,40323,39367,38992,39539,39518,40912,42271,41927,39361,38289,37446,37354,37897,37550,38010,38198,38328,39332,36427,36037,35410,34319,35486,34449,34938,36485,37966,38136,35481,34716,34766,35572,38680,39707,39698,39360,39525,39697,38668,38092,37865,36917,36430,35478,34176,32682,31736,30888,30427,30017,31554,22864,23007,22169,22217,19362,17669,16670,14936,13764,12330,11253,10771,59912,152109,393213,396224,394514,369097,361189,385959,302829,161141,0.012068,0.050492,0.130525,0.131525,0.130957,0.12252,0.119895,0.128117,0.100523,0.05349,0.019888,https://www2.census.gov/programs-surveys/popes...
5,United States,California,West,39356141,393.56141,19843586,19512555,0.495794,0.504206,462589,462713,477322,485894,495198,493458,494221,493396,504330,493445,492283,511109,512662,507455,505628,503712,501846,497188,515261,501692,493088,497749,512251,533604,557011,576604,588951,613288,640318,640758,611094,595453,577581,574306,575253,556953,559625,555569,542576,549416,510476,499294,496070,486794,500853,483161,483781,493411,516222,535475,504901,490446,479620,481731,506585,509027,503106,495480,495887,505031,475913,467624,457973,440326,435989,411959,392969,371355,358803,354786,334996,325934,330822,255411,249635,233568,229072,200181,182011,172849,158877,148337,135255,124947,116502,749846,1921127,5007987,5113402,5993606,5257626,4975333,4786356,3386670,1701599,0.011754,0.048814,0.127248,0.129926,0.152292,0.133591,0.126418,0.121616,0.086052,0.043236,0.019053,https://www2.census.gov/programs-surveys/popes...


In [381]:
territories = {
    'American Samoa': {"Country":"United States", "Region":"North America", "Level":"Territory"},
    'Anguilla': {"Country":"United Kingdom", "Region":"Europe", "Level":"Territory"},
    'Caribbean Netherlands': {"Country":"Netherlands", "Region":"Europe", "Level":"Territory"},
    'Channel Islands': {"Country":"United Kingdom", "Region":"Europe", "Level":"Territory"},
    'Curaçao': {"Country":"Netherlands", "Region":"Europe", "Level":"Territory"},
    'Falkland Islands': {"Country":"United Kingdom", "Region":"Europe", "Level":"Territory"},
    'French Guiana': {"Country":"France", "Region":"Europe", "Level":"Territory"},
    'Guadeloupe': {"Country":"France", "Region":"Europe", "Level":"Territory"},
    'Hong Kong': {"Country":"China", "Region":"East Asia and Pacific", "Level":"Territory"},
    'Macao': {"Country":"China", "Region":"East Asia and Pacific", "Level":"Territory"},
    'Martinique': {"Country":"France", "Region":"Europe", "Level":"Territory"},
    'Mayotte': {"Country":"France", "Region":"Europe", "Level":"Territory"},
    'Montserrat': {"Country":"United Kingdom", "Region":"Europe", "Level":"Territory"},
    'Réunion': {"Country":"France", "Region":"Europe", "Level":"Territory"},
    'St. Barthelemy': {"Country":"France", "Region":"Europe", "Level":"Territory"},
    'St. Helena': {"Country":"United Kingdom", "Region":"Europe", "Level":"Territory"},
    'St. Martin': {"Country":"Netherlands", "Region":"Europe", "Level":"Territory"},
    'St. Pierre & Miquelon': {"Country":"France", "Region":"Europe", "Level":"Territory"},
    'State of Palestine': {"Country":"State of Palestine", "Region":"Middle East and North Africa", "Level":"Contested"},
    'Tokelau': {"Country":"Netherlands", "Region":"Europe", "Level":"Territory"},
    'Turks and Caicos': {"Country":"United Kingdom", "Region":"Europe", "Level":"Territory"},
    'U.S. Virgin Islands': {"Country":"United States", "Region":"North America", "Level":"Territory"},
    'Wallis & Futuna': {"Country":"France", "Region":"Europe", "Level":"Territory"},
    'Western Sahara': {"Country":"Western Sahara", "Region":"Middle East and North Africa", "Level":"Contested"}
}

print(territories.keys())

def fixTerritories(df):
    df["Level"] = df.apply(lambda x: territories[x]["Level"] if x in territories.keys() else "State/Province" if len(x["State/Province"])>0 else "Country")
    df["Region"] = df.apply(lambda x: territories[x]["Region"] if x in territories.keys() else x["Region"])
    df["Country"] = df.apply(lambda x: x if x in territories.keys() else x["Country"])

dict_keys(['American Samoa', 'Anguilla', 'Caribbean Netherlands', 'Channel Islands', 'Curaçao', 'Falkland Islands', 'French Guiana', 'Guadeloupe', 'Hong Kong', 'Macao', 'Martinique', 'Mayotte', 'Montserrat', 'Réunion', 'St. Barthelemy', 'St. Helena', 'St. Martin', 'St. Pierre & Miquelon', 'State of Palestine', 'Tokelau', 'Turks and Caicos', 'U.S. Virgin Islands', 'Wallis & Futuna', 'Western Sahara'])


In [382]:
canada_pop = canada_last_population.copy()
canada_pop.rename(columns = {'Population Source URL' : 'Population Source'}, inplace = True)
canada_pop["Country"] = "Canada"
canada_pop["Population (100K)"] = canada_pop["Population"]/100000
populations = pd.concat([country_populations, canada_pop], ignore_index=True, sort=True)
populations = pd.concat([populations, us_state_demographics], ignore_index=True, sort=True)
populations_column_order = [
    'Country', 'Census Region', 'State/Province',
    'Population', 'Population (100K)', 
    'World Share (%)', 'Urban Population (%)', 'Annual Change (%)', 'Net Change', 'Migrants (net)', 
    'Density (P/Km²)', 'Land Area (Km²)',
    'Fertility Rate', 'Median Age',  
    'Female', 'Male', 'Pct Male', 'Pct Female',
    '< 1', 1, 2, 3, 4, 5, 6, 7, 8, 9,
    10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
    20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
    30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
    40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
    50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
    60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
    70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
    80, 81, 82, 83, 84, '85+',
    '1-4', '5-14', '15-24', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84',
    'Pct < 1', 'Pct 1-4', 'Pct 5-14', 'Pct 15-24', 'Pct 25-34', 'Pct 35-44', 'Pct 45-54', 'Pct 55-64', 'Pct 65-74', 'Pct 75-84', 'Pct 85+',
    'Population Source'
]
populations = populations[populations_column_order]
populations["State/Province"] = populations.apply(lambda x: "" if x["State/Province"]==x["Country"] else x["State/Province"], axis=1)
populations["Region"] = populations["Country"].apply(lambda x: region_from_country(x))

def territoryLevel(country, state):
    if country in territories.keys():
        return territories[country]["Level"]
    elif (state in ["",None,np.nan]):
        return "Country"
    else:
        return "State/Province"

def territoryValue(country, levelValue, level):
    if country in territories.keys():
        return territories[country][level]
    else:
        return levelValue
    
def territoryState(country,state):
    if country in territories.keys():
        return country
    else:
        return state

populations["State/Province"].str.replace('\.','')
populations["Census Region"].str.replace('\.','')
populations["Level"] = populations.apply(lambda x: territoryLevel(x["Country"],x["State/Province"]), axis=1)
populations["Region"] = populations.apply(lambda x: territoryValue(x["Country"],x["Region"],"Region"), axis=1)
populations["State/Province"] = populations.apply(lambda x: territoryState(x["Country"],x["State/Province"]), axis=1)
populations["Country"] = populations.apply(lambda x: territoryValue(x["Country"],x["Country"],"Country"), axis=1)
first_column = populations.pop("Region")
populations.insert(0, "Region", first_column)
first_column = populations.pop("Level")
populations.insert(0, "Level", first_column)
populations.head()

Unnamed: 0,Level,Region,Country,Census Region,State/Province,Population,Population (100K),World Share (%),Urban Population (%),Annual Change (%),Net Change,Migrants (net),Density (P/Km²),Land Area (Km²),Fertility Rate,Median Age,Female,Male,Pct Male,Pct Female,< 1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85+,1-4,5-14,15-24,25-34,35-44,45-54,55-64,65-74,75-84,Pct < 1,Pct 1-4,Pct 5-14,Pct 15-24,Pct 25-34,Pct 35-44,Pct 45-54,Pct 55-64,Pct 65-74,Pct 75-84,Pct 85+,Population Source
0,Country,South Asia,Afghanistan,,,38928346,389.28346,0.5,25.0,2.33,886592.0,-62920.0,60.0,652860.0,4.6,18.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...
1,Country,Europe,Albania,,,2877797,28.77797,0.04,63.0,-0.11,-3120.0,-14000.0,105.0,27400.0,1.6,36.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...
2,Country,Middle East and North Africa,Algeria,,,43851044,438.51044,0.56,73.0,1.85,797990.0,-10000.0,18.0,2381740.0,3.1,29.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...
3,Territory,North America,United States,,American Samoa,55191,0.55191,0.0,88.0,-0.22,-121.0,,276.0,200.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...
4,Country,Europe,Andorra,,,77265,0.77265,0.0,88.0,0.16,123.0,,164.0,470.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...


In [383]:
region_populations = pd.pivot_table(populations,index=["Region"],values=["Population","Land Area (Km²)","Net Change","Migrants (net)"],aggfunc=np.sum)
region_populations["Density (P/Km²)"] = region_populations["Population"]/region_populations["Land Area (Km²)"]
region_populations["Level"] = "Region"
region_populations["Population (100K)"] = region_populations["Population"]/100000
region_populations = region_populations.reset_index()
first_column = region_populations.pop("Level")
region_populations.insert(0, "Level", first_column)
region_populations

Unnamed: 0,Level,Region,Land Area (Km²),Migrants (net),Net Change,Population,Density (P/Km²),Population (100K)
0,Region,Central Asia,21289774.0,400717.0,2196755.0,325077353,15.269178,3250.77353
1,Region,East Asia and Pacific,24387266.0,-411453.0,12636630.0,2389319485,97.974061,23893.19485
2,Region,Europe,6244604.0,1177050.0,413943.0,602160122,96.428872,6021.60122
3,Region,Latin America,20040046.0,-521358.0,5831297.0,652545062,32.562054,6525.45062
4,Region,Middle East and North Africa,11501690.0,-234182.0,7813543.0,464362948,40.373454,4643.62948
5,Region,North America,18241480.0,1196387.0,2268567.0,1099020101,60.248406,10990.20101
6,Region,South Asia,4771577.0,-1243073.0,21079188.0,1856376663,389.048875,18563.76663
7,Region,Sub-Saharan Africa,23617646.0,-362825.0,29090716.0,1136052685,48.101859,11360.52685


In [384]:
all_populations_column_order = [
    'Level','Region','Country', 'Census Region', 'State/Province',
    'Population', 'Population (100K)', 
    'World Share (%)', 'Urban Population (%)', 'Annual Change (%)', 'Net Change', 'Migrants (net)', 
    'Density (P/Km²)', 'Land Area (Km²)',
    'Fertility Rate', 'Median Age',  
    'Female', 'Male', 'Pct Male', 'Pct Female',
    '< 1', 1, 2, 3, 4, 5, 6, 7, 8, 9,
    10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
    20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
    30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
    40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
    50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
    60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
    70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
    80, 81, 82, 83, 84, '85+',
    '1-4', '5-14', '15-24', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84',
    'Pct < 1', 'Pct 1-4', 'Pct 5-14', 'Pct 15-24', 'Pct 25-34', 'Pct 35-44', 'Pct 45-54', 'Pct 55-64', 'Pct 65-74', 'Pct 75-84', 'Pct 85+',
    'Population Source'
]

all_populations = pd.concat([region_populations,populations],sort=True)
all_populations = all_populations[all_populations_column_order]
all_populations["Country"].str.replace('\.','')
all_populations["Census Region"].str.replace('\.','')
all_populations["State/Province"].str.replace('\.','')
all_populations = all_populations.sort_values(['Level','Region','Country', 'Census Region', 'State/Province'])
all_populations.to_excel(populations_file, index = False)
all_populations.head()

Unnamed: 0,Level,Region,Country,Census Region,State/Province,Population,Population (100K),World Share (%),Urban Population (%),Annual Change (%),Net Change,Migrants (net),Density (P/Km²),Land Area (Km²),Fertility Rate,Median Age,Female,Male,Pct Male,Pct Female,< 1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85+,1-4,5-14,15-24,25-34,35-44,45-54,55-64,65-74,75-84,Pct < 1,Pct 1-4,Pct 5-14,Pct 15-24,Pct 25-34,Pct 35-44,Pct 45-54,Pct 55-64,Pct 65-74,Pct 75-84,Pct 85+,Population Source
199,Contested,Middle East and North Africa,State of Palestine,,State of Palestine,5101414,51.01414,0.07,80.0,2.41,119994.0,-10563.0,847.0,6020.0,3.7,21.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...
231,Contested,Middle East and North Africa,Western Sahara,,Western Sahara,597339,5.97339,0.01,87.0,2.55,14876.0,5582.0,2.0,266000.0,2.4,28.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...
9,Country,Central Asia,Armenia,,,2963243,29.63243,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...
13,Country,Central Asia,Azerbaijan,,,10139177,101.39177,0.13,56.0,0.91,91459.0,1200.0,123.0,82658.0,2.1,32.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...
52,Country,Central Asia,Cyprus,,,1207359,12.07359,0.02,67.0,0.73,8784.0,5000.0,131.0,9240.0,1.3,37.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...


In [385]:
c_renamed["Country"] = c_renamed["Country"].apply(lambda x: "" if x in ["",None,np.nan] else x)
c_renamed["Country"] = c_renamed.apply(lambda x: "State of Palestine" if x["Country"]=="Palestinian Territories" else x["Country"], axis=1)
c_renamed['Country'].replace('', np.nan, inplace=True)
c_renamed.dropna(subset=['Country'], inplace=True)
c_renamed.head(-20)

Unnamed: 0,Date,Level,Region,Country,State Abbreviation,State/Province,FIPS,Status,Data Quality,Total Deaths,Deaths Daily,Total Recovered,Total Tests,Tests Daily,Total Negative,Negative Daily,Total Positive,Positive Daily,Total Hospitalized,Currently Hospitalized,Hospitalized Daily,Total In ICU,Currently In ICU,Total On Ventilator,Currently On Ventilator,SARS-CoV-2 Accessed,SARS-CoV-2 Source Date,SARS-CoV-2 Source
0,02/24/2020,Country,South Asia,Afghanistan,,,,,,,0,,,0,,,,1,,,,,,,,11/14/2020,NaT,https://github.com/dsbbfinddx/FINDCov19Tracker...
1,02/25/2020,Country,South Asia,Afghanistan,,,,,,,0,,,0,,,,0,,,,,,,,11/14/2020,NaT,https://github.com/dsbbfinddx/FINDCov19Tracker...
2,02/26/2020,Country,South Asia,Afghanistan,,,,,,,0,,,0,,,,0,,,,,,,,11/14/2020,NaT,https://github.com/dsbbfinddx/FINDCov19Tracker...
3,02/27/2020,Country,South Asia,Afghanistan,,,,,,,0,,,0,,,,0,,,,,,,,11/14/2020,NaT,https://github.com/dsbbfinddx/FINDCov19Tracker...
4,02/28/2020,Country,South Asia,Afghanistan,,,,,,,0,,,0,,,,0,,,,,,,,11/14/2020,NaT,https://github.com/dsbbfinddx/FINDCov19Tracker...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68795,10/21/2020,Country,North America,United States,,,0.0,0.0,,213702.0,1024,3323354.0,133598160.0,939535,110628789.0,690695,8254316.0,60657,441206.0,40271.0,2239.0,22855.0,8182.0,2622.0,2090.0,2020-11-14 21:49:32.202067,2020-10-21,https://covidtracking.com/data/download/all-st...
68796,10/22/2020,Country,North America,United States,,,0.0,0.0,,214845.0,1143,3353056.0,134816718.0,1218558,111541701.0,912912,8327203.0,72887,443777.0,41010.0,2571.0,23018.0,8086.0,2641.0,2147.0,2020-11-14 21:49:32.202067,2020-10-22,https://covidtracking.com/data/download/all-st...
68797,10/23/2020,Country,North America,United States,,,0.0,0.0,,215762.0,917,3375480.0,136195292.0,1378574,112584449.0,1042748,8409465.0,82262,458782.0,41482.0,15005.0,23221.0,8237.0,2679.0,2180.0,2020-11-14 21:49:32.202067,2020-10-23,https://covidtracking.com/data/download/all-st...
68798,10/24/2020,Country,North America,United States,,,0.0,0.0,,216652.0,890,3406701.0,137516626.0,1321334,113545013.0,960564,8492210.0,82745,460538.0,41905.0,1756.0,23356.0,8561.0,2691.0,2230.0,2020-11-14 21:49:32.202067,2020-10-24,https://covidtracking.com/data/download/all-st...


In [386]:
pgmm_state_input_order =["Level","Region","Country","State/Province","Date","Positive Daily","Total Positive","Deaths Daily","Total Deaths","Tests Daily","Total Tests"]
pgmm_states = c_renamed[pgmm_state_input_order].loc[c_renamed["Level"]=="State/Province"].copy()
pgmm_states.head()

pgmm_country_input_order =["Level","Region","Country","Date","Positive Daily","Total Positive","Deaths Daily","Total Deaths","Tests Daily","Total Tests"]
pgmm_countries = c_renamed[pgmm_country_input_order].loc[c_renamed["Level"]=="Country"].copy()
pgmm_countries["Region"] = pgmm_countries.apply(lambda x: territoryValue(x["Country"],x["Region"],"Region"), axis=1)
pgmm_countries["Country"] = pgmm_countries.apply(lambda x: territoryValue(x["Country"],x["Country"],"Country"), axis=1)
#pgmm_countries.head(-100)
pgmm_countries.loc[pgmm_countries["Region"]=="Sub-Saharan Africa"].head(-100)

pgmm_countries_zeroed = pgmm_countries.copy()
pgmm_countries_zeroed = pgmm_countries_zeroed.fillna(0)
pgmm_regions = pd.pivot_table(pgmm_countries_zeroed,index=["Region","Date"],values=["Positive Daily","Total Positive","Deaths Daily","Total Deaths","Tests Daily","Total Tests"],aggfunc=np.sum)
pgmm_regions["Level"] = "Region"
pgmm_regions = pgmm_regions.reset_index()
pgmm_regions.head(-100)

pgmm_order = ["Level","Region","Country","State/Province","Date","Week","Positive Daily","Total Positive","Deaths Daily","Total Deaths","Tests Daily","Total Tests"]
pgmm_input = pd.concat([pgmm_states,pgmm_countries,pgmm_regions],sort=False)
pgmm_input["Date"] = pgmm_input["Date"].astype(str)
pgmm_input["Date"] = pgmm_input["Date"].apply(lambda x: x.replace(" ",""))
pgmm_input["Week"] = pgmm_input["Date"].apply(lambda x: x[-4:]+datetime.strptime(x, '%m/%d/%Y').strftime('%V'))
pgmm_input = pgmm_input[pgmm_order]
pgmm_input = pgmm_input.sort_values(["Level","Region","Country","State/Province","Date"])
pgmm_input.head(-100)

pgmm_input["State/Province"] = pgmm_input.apply(lambda x: np.nan if not (x["Level"]=="State/Province") else x["State/Province"],axis=1)
pgmm_input["Country"] = pgmm_input.apply(lambda x: np.nan if (x["Level"]=="Region") else x["Country"],axis=1)

pgmm_input.head()

Unnamed: 0,Level,Region,Country,State/Province,Date,Week,Positive Daily,Total Positive,Deaths Daily,Total Deaths,Tests Daily,Total Tests
1768,Country,Central Asia,Armenia,,03/01/2020,202009,1,,0,,0,
1769,Country,Central Asia,Armenia,,03/02/2020,202010,0,,0,,0,
1770,Country,Central Asia,Armenia,,03/03/2020,202010,0,,0,,0,
1771,Country,Central Asia,Armenia,,03/04/2020,202010,0,,0,,0,
1772,Country,Central Asia,Armenia,,03/05/2020,202010,0,,0,,0,


In [387]:
pgmm_population = all_populations[['Level','Region','Country', 'State/Province','Census Region', 
    'Population', 'Population (100K)']].copy()
pgmm_population["State/Province"] = pgmm_population.apply(lambda x: np.nan if ((x["State/Province"]=="State of Palestine") or (x["State/Province"]=="Western Sahara")) else x["State/Province"],axis=1)
pgmm_population["Level"] = pgmm_population.apply(lambda x: "Country" if ((x["Country"]=="State of Palestine") or (x["Country"]=="Western Sahara")) else x["Level"],axis=1)
pgmm_population = pgmm_population.fillna(np.nan)
pgmm_population.head()

Unnamed: 0,Level,Region,Country,State/Province,Census Region,Population,Population (100K)
199,Country,Middle East and North Africa,State of Palestine,,,5101414,51.01414
231,Country,Middle East and North Africa,Western Sahara,,,597339,5.97339
9,Country,Central Asia,Armenia,,,2963243,29.63243
13,Country,Central Asia,Azerbaijan,,,10139177,101.39177
52,Country,Central Asia,Cyprus,,,1207359,12.07359


In [388]:
pgmm_demographics_order = [
    'Level','Region','Country', 'Census Region', 'State/Province','Date','Week',
    'Population', 'Population (100K)',
    "Positive Daily","Total Positive","Deaths Daily","Total Deaths","Tests Daily","Total Tests"
]
pgmm_demographics = pgmm_input.merge(pgmm_population,how='left',on=['Level','Region','Country','State/Province'])
pgmm_demographics = pgmm_demographics[pgmm_demographics_order]
pgmm_demographics.to_excel(cleanedFolder + "pgmm_input.xlsx", index = False)

characteristics_cols = ['Level','Region','Country', 'Census Region', 'State/Province']
nonAsciiValues = []
for col in characteristics_cols:
    print(col+" unique values:")
    pgmm_demographics[col] = pgmm_demographics[col].fillna("")
    col_values = pgmm_demographics[col].sort_values(ascending = True).unique()
    print(col_values)
    for value in col_values:
        if not value.isascii():
            nonAsciiValues.append(value)
print(nonAsciiValues)
pgmm_demographics['Country'] = pgmm_demographics['Country'].apply(
    lambda x: "Cote d'Ivoire" if x=="Côte d’Ivoire" else "Sao Tome and Principe" if x=="São Tomé and Príncipe" else x)

pgmm_demographics.head(100)

Level unique values:
['Country' 'Region' 'State/Province']
Region unique values:
['Central Asia' 'East Asia and Pacific' 'Europe' 'Latin America'
 'Middle East and North Africa' 'North America' 'South Asia'
 'Sub-Saharan Africa']
Country unique values:
['' 'Afghanistan' 'Albania' 'Algeria' 'Andorra' 'Angola'
 'Antigua & Barbuda' 'Argentina' 'Armenia' 'Australia' 'Austria'
 'Azerbaijan' 'Bahamas' 'Bahrain' 'Bangladesh' 'Barbados' 'Belarus'
 'Belgium' 'Belize' 'Benin' 'Bhutan' 'Bolivia' 'Bosnia & Herzegovina'
 'Botswana' 'Brazil' 'Brunei' 'Bulgaria' 'Burkina Faso' 'Burundi'
 'Cabo Verde' 'Cambodia' 'Cameroon' 'Canada' 'Central African Republic'
 'Chad' 'Chile' 'China' 'Colombia' 'Comoros' 'Costa Rica' 'Croatia' 'Cuba'
 'Cyprus' 'Czech Republic' 'Côte d’Ivoire' 'Democratic Republic of Congo'
 'Denmark' 'Djibouti' 'Dominica' 'Dominican Republic' 'Ecuador' 'Egypt'
 'El Salvador' 'Equatorial Guinea' 'Eritrea' 'Estonia' 'Ethiopia' 'Fiji'
 'Finland' 'France' 'Gabon' 'Gambia' 'Georgia' 'Germany

Unnamed: 0,Level,Region,Country,Census Region,State/Province,Date,Week,Population,Population (100K),Positive Daily,Total Positive,Deaths Daily,Total Deaths,Tests Daily,Total Tests
0,Country,Central Asia,Armenia,,,03/01/2020,202009,2963243.0,29.63243,1,,0,,0,
1,Country,Central Asia,Armenia,,,03/02/2020,202010,2963243.0,29.63243,0,,0,,0,
2,Country,Central Asia,Armenia,,,03/03/2020,202010,2963243.0,29.63243,0,,0,,0,
3,Country,Central Asia,Armenia,,,03/04/2020,202010,2963243.0,29.63243,0,,0,,0,
4,Country,Central Asia,Armenia,,,03/05/2020,202010,2963243.0,29.63243,0,,0,,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Country,Central Asia,Armenia,,,06/04/2020,202023,2963243.0,29.63243,697,,6,,1809,
96,Country,Central Asia,Armenia,,,06/05/2020,202023,2963243.0,29.63243,596,,7,,1702,
97,Country,Central Asia,Armenia,,,06/06/2020,202023,2963243.0,29.63243,547,,7,,1513,
98,Country,Central Asia,Armenia,,,06/07/2020,202023,2963243.0,29.63243,766,,10,,2497,


In [389]:
pgmm_daily = pgmm_demographics.copy();
pgmm_daily = pgmm_daily.drop(["Total Positive","Total Deaths","Total Tests"],1)
calc_cols = ["Positive","Deaths","Tests"]
for col in calc_cols:
    daily_col = col + " Daily"
    daily_rate_col = daily_col + " Rate"
    total_col = "Total " + col
    total_rate_col = total_col + " Rate"
    pgmm_daily[daily_col].fillna(0);
    pgmm_daily[total_col] = pgmm_daily.groupby(characteristics_cols)[daily_col].transform(pd.Series.cumsum)
    pgmm_daily[daily_rate_col] = pgmm_daily[daily_col]/pgmm_daily["Population (100K)"]
    pgmm_daily[total_rate_col] = pgmm_daily[total_col]/pgmm_daily["Population (100K)"]
pgmm_daily.to_excel(cleanedFolder + "pgmm_daily.xlsx", index = False)
pgmm_daily.head()

Unnamed: 0,Level,Region,Country,Census Region,State/Province,Date,Week,Population,Population (100K),Positive Daily,Deaths Daily,Tests Daily,Total Positive,Positive Daily Rate,Total Positive Rate,Total Deaths,Deaths Daily Rate,Total Deaths Rate,Total Tests,Tests Daily Rate,Total Tests Rate
0,Country,Central Asia,Armenia,,,03/01/2020,202009,2963243.0,29.63243,1,0,0,1,0.0337468,0.0337468,0,0,0,0,0.0,0.0
1,Country,Central Asia,Armenia,,,03/02/2020,202010,2963243.0,29.63243,0,0,0,1,0.0,0.0337468,0,0,0,0,0.0,0.0
2,Country,Central Asia,Armenia,,,03/03/2020,202010,2963243.0,29.63243,0,0,0,1,0.0,0.0337468,0,0,0,0,0.0,0.0
3,Country,Central Asia,Armenia,,,03/04/2020,202010,2963243.0,29.63243,0,0,0,1,0.0,0.0337468,0,0,0,0,0.0,0.0
4,Country,Central Asia,Armenia,,,03/05/2020,202010,2963243.0,29.63243,0,0,0,1,0.0,0.0337468,0,0,0,0,0.0,0.0


In [390]:
characteristics_and_population_cols = ['Level','Region','Country', 'Census Region', 'State/Province', "Week","Population","Population (100K)"]
pgmm_weekly = pgmm_daily.groupby(characteristics_and_population_cols).agg(
    positive_weekly = pd.NamedAgg(column="Positive Daily",aggfunc=np.sum),
    deaths_weekly = pd.NamedAgg(column="Deaths Daily",aggfunc=np.sum),
    tests_weekly = pd.NamedAgg(column="Tests Daily",aggfunc=np.sum),
)
pgmm_weekly.reset_index(inplace=True)
pgmm_weekly.rename(columns = {"positive_weekly": "Positive Weekly"},inplace = True)
pgmm_weekly.rename(columns = {"deaths_weekly": "Deaths Weekly"},inplace = True)
pgmm_weekly.rename(columns = {"tests_weekly": "Tests Weekly"},inplace = True)
for col in calc_cols:
    weekly_col = col + " Weekly"
    weekly_rate_col = weekly_col + " Rate"
    total_weekly_col = "Total " + col + " Weekly"
    total_weekly_rate_col = total_weekly_col + " Rate"
    pgmm_weekly[weekly_col].fillna(0);
    pgmm_weekly[total_weekly_col] = pgmm_weekly.groupby(characteristics_cols)[weekly_col].transform(pd.Series.cumsum)
    pgmm_weekly[weekly_rate_col] = pgmm_weekly[weekly_col]/pgmm_weekly["Population (100K)"]
    pgmm_weekly[total_weekly_rate_col] = pgmm_weekly[total_weekly_col]/pgmm_weekly["Population (100K)"]
pgmm_weekly.to_excel(cleanedFolder + "pgmm_weekly.xlsx", index = False)
pgmm_weekly.head()

Unnamed: 0,Level,Region,Country,Census Region,State/Province,Week,Population,Population (100K),Positive Weekly,Deaths Weekly,Tests Weekly,Total Positive Weekly,Positive Weekly Rate,Total Positive Weekly Rate,Total Deaths Weekly,Deaths Weekly Rate,Total Deaths Weekly Rate,Total Tests Weekly,Tests Weekly Rate,Total Tests Weekly Rate
0,Country,Central Asia,Armenia,,,202009,2963243.0,29.63243,1.0,0.0,0,1.0,0.033747,0.033747,0.0,0.0,0.0,0,0.0,0.0
1,Country,Central Asia,Armenia,,,202010,2963243.0,29.63243,0.0,0.0,0,1.0,0.0,0.033747,0.0,0.0,0.0,0,0.0,0.0
2,Country,Central Asia,Armenia,,,202011,2963243.0,29.63243,25.0,0.0,577,26.0,0.84367,0.877417,0.0,0.0,0.0,577,19.47191,19.47191
3,Country,Central Asia,Armenia,,,202012,2963243.0,29.63243,168.0,0.0,674,194.0,5.669464,6.546881,0.0,0.0,0.0,1251,22.74535,42.21726
4,Country,Central Asia,Armenia,,,202013,2963243.0,29.63243,230.0,3.0,1227,424.0,7.761766,14.308648,3.0,0.10124,0.10124,2478,41.407336,83.624596


In [236]:
statistics_populations = populations.drop('Census Region', axis=1).copy()
for i, val in enumerate(statistics_populations.columns.values):
    col_name = str(statistics_populations.columns.values[i])
    if ((col_name != "Country") and (col_name != "State/Province")):
        if "Population" in col_name:
            statistics_populations.columns.values[i] = "Statistics " + col_name
        else:
            statistics_populations.columns.values[i] = "Population " + col_name

statistics_and_population = pgmm_configured.merge(statistics_populations,how='outer',on=["Country","State/Province"])
statistics_and_population_file = cleanedFolder + "pgmm.xlsx"
statistics_and_population.to_excel(statistics_and_population_file, index = False)
statistics_and_population.head()

Unnamed: 0,Date,Level,Region,Country,Census Region,State/Province,New COVID Cases,Cumulative COVID Cases,New Cases,Cumulative Cases,Infection Rate,7 Day Moving Average New Cases,Infection Rate.1,New Deaths,Death Rate,Cumulative Deaths,7 Day Moving Average Deaths,Speed,Acceleration,Jerk,1 Day Persistence,7 Day Persistence,Population Level,Population Region,Statistics Population,Statistics Population (100K),Population World Share (%),Statistics Urban Population (%),Population Annual Change (%),Population Net Change,Population Migrants (net),Population Density (P/Km²),Population Land Area (Km²),Population Fertility Rate,Population Median Age,Population Female,Population Male,Population Pct Male,Population Pct Female,Population < 1,Population 1,Population 2,Population 3,Population 4,Population 5,Population 6,Population 7,Population 8,Population 9,Population 10,Population 11,Population 12,Population 13,Population 14,Population 15,Population 16,Population 17,Population 18,Population 19,Population 20,Population 21,Population 22,Population 23,Population 24,Population 25,Population 26,Population 27,Population 28,Population 29,Population 30,Population 31,Population 32,Population 33,Population 34,Population 35,Population 36,Population 37,Population 38,Population 39,Population 40,Population 41,Population 42,Population 43,Population 44,Population 45,Population 46,Population 47,Population 48,Population 49,Population 50,Population 51,Population 52,Population 53,Population 54,Population 55,Population 56,Population 57,Population 58,Population 59,Population 60,Population 61,Population 62,Population 63,Population 64,Population 65,Population 66,Population 67,Population 68,Population 69,Population 70,Population 71,Population 72,Population 73,Population 74,Population 75,Population 76,Population 77,Population 78,Population 79,Population 80,Population 81,Population 82,Population 83,Population 84,Population 85+,Population 1-4,Population 5-14,Population 15-24,Population 25-34,Population 35-44,Population 45-54,Population 55-64,Population 65-74,Population 75-84,Population Pct < 1,Population Pct 1-4,Population Pct 5-14,Population Pct 15-24,Population Pct 25-34,Population Pct 35-44,Population Pct 45-54,Population Pct 55-64,Population Pct 65-74,Population Pct 75-84,Population Pct 85+,Statistics Population Source
0,2020-09-08,Country,Sub-Saharan Africa,Angola,,,52.0,3033.0,,,,43.428571,,4.0,0.012569,124.0,,0.136459,-0.010324,-0.00404,4.369557,6.340465,Country,Sub-Saharan Africa,32866272.0,328.66272,0.42,67.0,3.27,1040977.0,6413.0,26.0,1246700.0,5.6,17.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...
1,2020-09-15,Country,Sub-Saharan Africa,Angola,,,130.0,3569.0,,,,76.571429,,3.0,0.009426,139.0,,0.240599,0.035013,0.019302,6.120053,4.321752,Country,Sub-Saharan Africa,32866272.0,328.66272,0.42,67.0,3.27,1040977.0,6413.0,26.0,1246700.0,5.6,17.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...
2,2020-09-08,Country,Sub-Saharan Africa,Benin,,,0.0,2213.0,,,,9.714286,,0.0,0.0,40.0,,0.082316,0.0,0.0,0.908654,0.426489,Country,Sub-Saharan Africa,12123200.0,121.232,0.16,48.0,2.73,322049.0,-2000.0,108.0,112760.0,4.9,19.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...
3,2020-09-15,Country,Sub-Saharan Africa,Benin,,,0.0,2267.0,,,,7.714286,,0.0,0.0,40.0,,0.065369,0.0,0.0,0.721578,0.966708,Country,Sub-Saharan Africa,12123200.0,121.232,0.16,48.0,2.73,322049.0,-2000.0,108.0,112760.0,4.9,19.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...
4,2020-09-08,Country,Sub-Saharan Africa,Botswana,,,0.0,2126.0,,,,57.428571,,0.0,0.0,9.0,,2.492887,-0.56431,-1.333261,6.587742,2.303039,Country,Sub-Saharan Africa,2351627.0,23.51627,0.03,73.0,2.08,47930.0,3000.0,4.0,566730.0,2.9,24.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://www.worldometers.info/world-population...


In [296]:
pgmm_country_populations = country_demographics.copy()
pgmm_country_populations = pgmm_country_populations.drop(
    ["Population","Population (100K)",'Population Source','Population Region', 'Latitude', 'Longitude'],
    axis=1
)
pgmm_daily_populations = pgmm_daily.merge(pgmm_country_populations,how="left",on=["Country"])
pgmm_daily_populations['Country'] = pgmm_daily_populations['Country'].apply(
    lambda x: "Côte d’Ivoire" if x=="Cote d'Ivoire" else "São Tomé and Príncipe" if x=="Sao Tome and Principe" else x)
pgmm_all_populations = all_populations.copy()
pgmm_all_populations = pgmm_all_populations.drop(
    ["Level","Region","Census Region","Population","Population (100K)","Population Source",
     'World Share (%)','Urban Population (%)', 'Annual Change (%)', 'Net Change',
     'Migrants (net)', 'Density (P/Km²)', 'Land Area (Km²)','Fertility Rate', 'Median Age'],
    axis=1
)
pgmm_daily_populations = pgmm_daily_populations.merge(pgmm_all_populations,how="left",on=["Country","State/Province"])
pgmm_daily_populations.to_excel(cleanedFolder + "input_cleaned.xlsx", index = False)
pgmm_daily_populations.head()

Index(['Level', 'Region', 'Country', 'Census Region', 'State/Province', 'Date',
       'Week', 'Population', 'Population (100K)', 'Positive Daily',
       'Deaths Daily', 'Tests Daily', 'Total Positive', 'Positive Daily Rate',
       'Total Positive Rate', 'Total Deaths', 'Deaths Daily Rate',
       'Total Deaths Rate', 'Total Tests', 'Tests Daily Rate',
       'Total Tests Rate', 'World Share (%)', 'Urban Population (%)',
       'Annual Change (%)', 'Net Change', 'Migrants (net)', 'Density (P/Km²)',
       'Land Area (Km²)', 'Fertility Rate', 'Median Age'],
      dtype='object')


Unnamed: 0,Level,Region,Country,Census Region,State/Province,Date,Week,Population,Population (100K),Positive Daily,Deaths Daily,Tests Daily,Total Positive,Positive Daily Rate,Total Positive Rate,Total Deaths,Deaths Daily Rate,Total Deaths Rate,Total Tests,Tests Daily Rate,Total Tests Rate,World Share (%),Urban Population (%),Annual Change (%),Net Change,Migrants (net),Density (P/Km²),Land Area (Km²),Fertility Rate,Median Age,Female,Male,Pct Male,Pct Female,< 1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85+,1-4,5-14,15-24,25-34,35-44,45-54,55-64,65-74,75-84,Pct < 1,Pct 1-4,Pct 5-14,Pct 15-24,Pct 25-34,Pct 35-44,Pct 45-54,Pct 55-64,Pct 65-74,Pct 75-84,Pct 85+
0,Country,Central Asia,Armenia,,,03/01/2020,202009,2963243.0,29.63243,1.0,0.0,0,1.0,0.033747,0.033747,0.0,0.0,0.0,0,0.0,0.0,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Country,Central Asia,Armenia,,,03/02/2020,202010,2963243.0,29.63243,0.0,0.0,0,1.0,0.0,0.033747,0.0,0.0,0.0,0,0.0,0.0,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Country,Central Asia,Armenia,,,03/03/2020,202010,2963243.0,29.63243,0.0,0.0,0,1.0,0.0,0.033747,0.0,0.0,0.0,0,0.0,0.0,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Country,Central Asia,Armenia,,,03/04/2020,202010,2963243.0,29.63243,0.0,0.0,0,1.0,0.0,0.033747,0.0,0.0,0.0,0,0.0,0.0,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Country,Central Asia,Armenia,,,03/05/2020,202010,2963243.0,29.63243,0.0,0.0,0,1.0,0.0,0.033747,0.0,0.0,0.0,0,0.0,0.0,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
