In [12]:
# Dependencies
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import io
import time
from datetime import datetime, timedelta
import json
import pprint
pp = pprint.PrettyPrinter(indent=4)
pd.set_option('display.max_columns', None)

rFolder = "C:/Users/janin/Downloads/"
dataFolder = "D:/Repositories/Global-COVID-Surveillance/data/"
cleanedFolder = dataFolder + "cleaned/"


now = datetime.now()
current_date = now.strftime('%Y-%m-%d')
print(current_date)
file_date = "2021-01-14"
file_end = " -Results- " + current_date +" .csv"
output_file_end = " -Results- " + file_date +" .csv"
statistics_file_end = " - GMM - " + file_date + " .txt"
print(file_end)

def print_column_unique(column):
    print("Column Values:")
    values = list(set(sorted(column,key=lambda v: (isinstance(v, str), v))))
    print(values)
    return values

def print_column_missing(column, comparison):
    values = print_column_unique(column)
    print("Comparison:")
    print(comparison)
    missing_values = []
    for value in values:
        inComparison = False
        for c in comparison:
            inComparison = (str(value) == str(c))
            if inComparison:
                break
        if not inComparison:
            missing_values.append(value)
    if len(missing_values) > 0:
        print("Column values not in comparison:")
        print(missing_values)
    else:
        print("No missing values")
    missing_values = []
    for value in comparison:
        if not value in values:
            missing_values.append(value)
    if len(missing_values) > 0:
        print("Comparison values not in column:")
        print(missing_values)
    else:
        print("No missing values")
    return values

2021-01-14
 -Results- 2021-01-14 .csv


populations = pd.read_excel(cleanedFolder + "all_populations.xlsx")
population_groups = ["Level","Region","Census Region","Country","State/Province"]
for group in population_groups:
    populations[group] = populations[group].astype(str)
    populations[group] = populations[group].apply(lambda x: "" if x=="nan" else x)
populations.head()

In [13]:
# Read starter

pgmm = pd.read_excel(dataFolder + "Starter.xlsx")
print(pgmm.columns)
pgmm.head()

# Read R results

pgmm_files = [
    'Central Asia',
    'East Asia and Pacific',
    'Europe',
    'Latin America',
    'Middle East and North Africa',
    'United States',
    'Canada',
    'South Asia',
    'Sub-Saharan Africa'
]

def readOutput(r,pgmm_read_file):
    df = pd.read_csv(pgmm_read_file)
    df = df.drop(["Unnamed: 0"], axis=1)
    df = df[df["V1"] != 0]
    if ((r == "Canada") or (r == "United States")):
        df["Region"] = "North America"
        df.rename(columns = {"V2": "State/Province"}, inplace=True)
        df["Country"] = r
        df["Level"] = df["State/Province"].apply(lambda x: "Country" if (x == "Region") else "State/Province")
        df["State/Province"] = df["State/Province"].apply(lambda x: "" if (x == "Region") else x)
    else:
        df["Region"] = r
        df.rename(columns = {"V2": "Country"}, inplace=True)
        df["Level"] = df["Country"].apply(lambda x: "Region" if (x == "Region") else "Country")
        df["Country"] = df["Country"].apply(lambda x: "" if (x == "Region") else x)
        df["State/Province"] = ""
    df.rename(columns = {
        "V1": 'Last Day of Week Excel Date',
        "V3": 'Cases Daily Last Day of Week',
        "V4": 'Total Cases Last Day of Week',
        "V5": 'Cases 7D Moving Average',
        "V6": 'Cases Last Day of Week Rate 100K',
        "V7": 'Deaths Daily Last Day of Week',
        "V8": 'Total Deaths Last Day of Week',
        "V9": 'Deaths 7D Moving Average',
        "V10": 'Deaths Last Day of Week Rate 100K',
        "V11": 'Speed', #Cases Last Day of Week Rate 100K 7D Moving Average
        "V12": 'Acceleration', #Average Daily Change in Speed
        "V13": 'Jerk', #Average Daily Change in Acceleration
        "V14": '1-Day Persistence', 
        "V15": '7-Day Persistence'
    }, inplace=True)
    return df
    

for r in pgmm_files:
    file_stem = cleanedFolder + r
    pgmm_read_file = file_stem + output_file_end
    statistics_read_file = file_stem + statistics_file_end
    output_df = readOutput(r,pgmm_read_file)
    pgmm = pd.concat([pgmm, output_df], ignore_index=True, sort=False)

# Create Time Variables
pgmm["Last Day of Week Excel Date"] = pgmm["Last Day of Week Excel Date"].astype(int)
pgmm["Time"] = pgmm["Last Day of Week Excel Date"].apply(lambda x: datetime.fromordinal(datetime(1900, 1, 1).toordinal() + x - 2))
pgmm["Date"] = pgmm["Time"].apply(lambda x: x.strftime('%m/%d/%Y'))
pgmm["Week"] = pgmm["Date"].apply(lambda x: 
                                  str(datetime.strptime(x, '%m/%d/%Y').isocalendar()[0]) + 
                                  " W" +
                                  str(datetime.strptime(x, '%m/%d/%Y').isocalendar()[1])
                                 )
pgmm["Output"] = True
pgmm_order = [
    'Region', 'Country', 'State/Province', 
    'Last Day of Week Excel Date', 'Date', 'Week',
    'Speed', 'Acceleration', 'Jerk', '7-Day Persistence', '1-Day Persistence',
    'Cases Daily Last Day of Week', 'Total Cases Last Day of Week',
    'Cases 7D Moving Average', 'Cases Last Day of Week Rate 100K',
    'Deaths Daily Last Day of Week', 'Total Deaths Last Day of Week',
    'Deaths 7D Moving Average', 'Deaths Last Day of Week Rate 100K', 'Output'
]
pgmm = pgmm[pgmm_order]
pgmm.head(8)

Index(['Level', 'Region', 'Country', 'State/Province', 'Excel Date'], dtype='object')


Unnamed: 0,Region,Country,State/Province,Last Day of Week Excel Date,Date,Week,Speed,Acceleration,Jerk,7-Day Persistence,1-Day Persistence,Cases Daily Last Day of Week,Total Cases Last Day of Week,Cases 7D Moving Average,Cases Last Day of Week Rate 100K,Deaths Daily Last Day of Week,Total Deaths Last Day of Week,Deaths 7D Moving Average,Deaths Last Day of Week Rate 100K,Output
0,Central Asia,Armenia,,44164,11/29/2020,2020 W48,41.190393,-1.017225,-1.282379,3.726627,6.203225,1174.0,134768.0,1220.571429,39.618756,21.0,2142.0,27.142857,0.708683,True
1,Central Asia,Armenia,,44171,12/06/2020,2020 W49,34.561555,-0.944911,-1.036509,3.582795,5.218361,978.0,141937.0,1024.142857,33.004381,17.0,2326.0,26.285714,0.573696,True
2,Central Asia,Armenia,,44178,12/13/2020,2020 W50,30.796375,0.168734,1.480039,3.00621,4.501324,1013.0,148325.0,912.571429,34.185519,24.0,2486.0,22.857143,0.809923,True
3,Central Asia,Armenia,,44185,12/20/2020,2020 W51,26.515351,-1.740371,-1.378798,2.678709,4.152724,652.0,153825.0,785.714286,22.00292,14.0,2630.0,20.571429,0.472455,True
4,Central Asia,Armenia,,44192,12/27/2020,2020 W52,19.32728,-0.805102,0.805102,3.396556,4.242193,485.0,157834.0,572.714286,16.367203,16.0,2768.0,19.714286,0.539949,True
5,Central Asia,Armenia,,44199,01/03/2021,2020 W53,10.572394,-1.234169,1.301663,8.384602,7.098557,229.0,160027.0,313.285714,7.72802,14.0,2850.0,11.714286,0.472455,True
6,Central Asia,Armenia,,44206,01/10/2021,2021 W1,10.143327,0.520665,-1.017225,4.586538,5.785513,337.0,162131.0,300.571429,11.372675,7.0,2929.0,11.285714,0.236228,True
7,Central Asia,Azerbaijan,,44164,11/29/2020,2020 W48,35.366353,1.372329,-0.708708,2.133164,4.996079,4170.0,118195.0,3585.857143,41.127598,38.0,1361.0,32.857143,0.374784,True


In [14]:
date_check = pgmm.groupby(pgmm["Country"])["Week"].max()
date_check.head()

Country
               2021 W1
Afghanistan    2021 W1
Albania        2021 W1
Algeria        2021 W1
Angola         2021 W1
Name: Week, dtype: object

us_check = pgmm.loc[(pgmm["Country"]=="United States") & (pgmm["State/Province"]=="")]
us_check.head()

In [15]:
input_df = pd.read_excel(cleanedFolder + "all_raw_input.xlsx")
input_df.drop(columns=["MM-DD-YYYY","DD-MM-YYYY"],inplace=True)
input_df["State/Province"] = input_df["State/Province"].astype(str)
input_df["State/Province"] = input_df["State/Province"].apply(lambda x: "" if x == "nan" else x)
input_df["Census Region"] = input_df["Census Region"].astype(str)
input_df["Census Region"] = input_df["Census Region"].apply(lambda x: "" if x == "nan" else x)
input_df["Status"] = input_df["Status"].astype(str)
input_df["Status"] = input_df["Status"].apply(lambda x: "" if x == "nan" else x)
input_df["Data Quality"] = input_df["Data Quality"].astype(str)
input_df["Data Quality"] = input_df["Data Quality"].apply(lambda x: "" if x == "nan" else x)
input_df.head()

Unnamed: 0,Level,Region,Country,Census Region,State/Province,Abbreviation,FIPS,Time,Date,Week,First Day of Week,Last Day of Week,Week Date Range,Status,Data Quality,Cases Daily,Cases Daily 7D Rolling,Total Cases,Cases Daily Rate,Cases Daily Rate 7D Rolling,Total Cases Rate,Tests Daily,Tests Daily 7D Rolling,Total Tests,Tests Daily Rate,Tests Daily Rate 7D Rolling,Total Tests Rate,Deaths Daily,Deaths Daily 7D Rolling,Total Deaths,Deaths Daily Rate,Deaths Daily Rate 7D Rolling,Total Deaths Rate,Positivity 7D Rolling,Speed Daily,Acceleration Daily,Jerk Daily,Negative Daily,Total Negative,Hospitalized Daily,Total Hospitalized,Currently Hospitalized,ICU Daily,Total ICU,Currently In ICU,Total In ICU,Currently On Ventilator,Total On Ventilator,Recovered Daily,Total Recovered,Active Daily,Total Active,Population,Population 100K,Country Population,Country Population 100K,Country Share,Region Population,Region Population 100K,Region Share,World Population,World Population 100K,World Share,World Share (%),Urban Population (%),Annual Change (%),Net Change,Migrants (net),Density (P/Km²),Land Area (Km²),Fertility Rate,Median Age,< 1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85+,1-4,5-14,15-24,25-34,35-44,45-54,55-64,65-74,75-84,Pct < 1,Pct 1-4,Pct 5-14,Pct 15-24,Pct 25-34,Pct 35-44,Pct 45-54,Pct 55-64,Pct 65-74,Pct 75-84,Pct 85+,Accessed
0,Country,Central Asia,Armenia,,,AM,,2020-03-01,03/01/2020,2020 W9,2020-02-24,2020-03-01,2/24/20 - 3/1/20,,,1,,1,0.033747,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1/14/2021
1,Country,Central Asia,Armenia,,,AM,,2020-03-02,03/02/2020,2020 W10,2020-03-02,2020-03-08,3/2/20 - 3/8/20,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1/14/2021
2,Country,Central Asia,Armenia,,,AM,,2020-03-03,03/03/2020,2020 W10,2020-03-02,2020-03-08,3/2/20 - 3/8/20,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1/14/2021
3,Country,Central Asia,Armenia,,,AM,,2020-03-04,03/04/2020,2020 W10,2020-03-02,2020-03-08,3/2/20 - 3/8/20,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1/14/2021
4,Country,Central Asia,Armenia,,,AM,,2020-03-05,03/05/2020,2020 W10,2020-03-02,2020-03-08,3/2/20 - 3/8/20,,,0,,1,0.0,,0.033747,0,,0,0.0,,0.0,0,,0,0.0,,0.0,,,,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1/14/2021


us_check = input_df.loc[(input_df["Country"]=="United States") & (input_df["State/Province"]=="")]
us_check.head()

In [16]:
all_df = input_df.merge(pgmm,how="left",on=['Region', 'Country', 'State/Province', 'Date', 'Week'])
all_df["Output"] = all_df["Output"].apply(lambda x: x if x==True else False)
all_df["Level"] = all_df.apply(
    lambda x: "Country" if ((x["Country"]=="United States") & (x["State/Province"]=="")) else x["Level"],
    axis=1
)
all_order = [
    'Level', 'Region', 'Country', 'Census Region', 'State/Province', 'Abbreviation', 'FIPS',
    'Time','Date','Week', 'First Day of Week', 'Last Day of Week', 'Week Date Range', 'Last Day of Week Excel Date', 'Accessed',
    'Output', 'Status', 'Data Quality',
    'Speed', 'Speed Daily', 'Acceleration', 'Acceleration Daily', 'Jerk', 'Jerk Daily', '7-Day Persistence', '1-Day Persistence',
    'Cases Daily', 'Cases Daily 7D Rolling', 'Cases 7D Moving Average', 'Cases Daily Last Day of Week', 
    'Cases Daily Rate', 'Cases Daily Rate 7D Rolling', 'Cases Last Day of Week Rate 100K',
    'Total Cases', 'Total Cases Rate', 'Total Cases Last Day of Week',
    'Deaths Daily', 'Deaths Daily 7D Rolling', 'Deaths 7D Moving Average', 'Deaths Daily Last Day of Week', 
    'Deaths Daily Rate', 'Deaths Daily Rate 7D Rolling', 'Deaths Last Day of Week Rate 100K', 
    'Total Deaths', 'Total Deaths Rate', 'Total Deaths Last Day of Week',
    'Tests Daily', 'Tests Daily 7D Rolling',
    'Tests Daily Rate', 'Tests Daily Rate 7D Rolling',
    'Total Tests', 'Total Tests Rate',
    'Positivity 7D Rolling', 
    'Active Daily', 'Total Active', 
    'Negative Daily', 'Total Negative', 
    'Recovered Daily', 'Total Recovered', 
    'Hospitalized Daily', 'Currently Hospitalized', 'Total Hospitalized', 
    'ICU Daily', 'Total ICU', 
    'Currently In ICU', 'Total In ICU', 
    'Currently On Ventilator', 'Total On Ventilator', 
    "Population","Population 100K",
    "Country Population","Country Population 100K", "Country Share",
    "Region Population","Region Population 100K", "Region Share",
    "World Population", "World Population 100K", "World Share",
    'World Share (%)', 'Urban Population (%)', 'Annual Change (%)', 'Net Change', 'Migrants (net)', 'Density (P/Km²)',
    'Land Area (Km²)', 'Fertility Rate', 'Median Age',
    '< 1', 1, 2, 3, 4, 5, 6, 7, 8, 9,
    10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
    20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
    30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
    40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
    50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
    60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
    70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
    80, 81, 82, 83, 84, '85+',
    '1-4','5-14','15-24','25-34','35-44','45-54','55-64','65-74','75-84',
    'Pct < 1','Pct 1-4','Pct 5-14','Pct 15-24','Pct 25-34','Pct 35-44','Pct 45-54','Pct 55-64','Pct 65-74','Pct 75-84','Pct 85+'
]
print_column_missing(all_df.columns,all_order)
all_df = all_df[all_order]

all_out_file = cleanedFolder + "all_combined.xlsx"
print(all_out_file)
all_df.to_excel(all_out_file, index=False)
all_df.head(14)

Column Values:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 'Net Change', '5-14', 'Total Hospitalized', 'Total Tests', 'Jerk', 'Total Cases Rate', 'Total Cases', 'Cases Daily Rate 7D Rolling', 'Country Population 100K', 'Tests Daily Rate 7D Rolling', 'Country Share', 'First Day of Week', 'Pct 45-54', 'Total Deaths', '1-4', 'Active Daily', 'Total ICU', 'Pct < 1', 'Speed Daily', 'Pct 35-44', 'Deaths 7D Moving Average', '85+', 'Acceleration Daily', 'Week Date Range', 'State/Province', '55-64', 'Annual Change (%)', 'Population', 'Country', 'Last Day of Week Excel Date', '65-74', 'Region Share', 'Total Active', 'Cases 7D Moving Average', 'Total On Ventilator', 'Total Negative', 'Migrants (net)', 'Deaths Daily Las

Unnamed: 0,Level,Region,Country,Census Region,State/Province,Abbreviation,FIPS,Time,Date,Week,First Day of Week,Last Day of Week,Week Date Range,Last Day of Week Excel Date,Accessed,Output,Status,Data Quality,Speed,Speed Daily,Acceleration,Acceleration Daily,Jerk,Jerk Daily,7-Day Persistence,1-Day Persistence,Cases Daily,Cases Daily 7D Rolling,Cases 7D Moving Average,Cases Daily Last Day of Week,Cases Daily Rate,Cases Daily Rate 7D Rolling,Cases Last Day of Week Rate 100K,Total Cases,Total Cases Rate,Total Cases Last Day of Week,Deaths Daily,Deaths Daily 7D Rolling,Deaths 7D Moving Average,Deaths Daily Last Day of Week,Deaths Daily Rate,Deaths Daily Rate 7D Rolling,Deaths Last Day of Week Rate 100K,Total Deaths,Total Deaths Rate,Total Deaths Last Day of Week,Tests Daily,Tests Daily 7D Rolling,Tests Daily Rate,Tests Daily Rate 7D Rolling,Total Tests,Total Tests Rate,Positivity 7D Rolling,Active Daily,Total Active,Negative Daily,Total Negative,Recovered Daily,Total Recovered,Hospitalized Daily,Currently Hospitalized,Total Hospitalized,ICU Daily,Total ICU,Currently In ICU,Total In ICU,Currently On Ventilator,Total On Ventilator,Population,Population 100K,Country Population,Country Population 100K,Country Share,Region Population,Region Population 100K,Region Share,World Population,World Population 100K,World Share,World Share (%),Urban Population (%),Annual Change (%),Net Change,Migrants (net),Density (P/Km²),Land Area (Km²),Fertility Rate,Median Age,< 1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85+,1-4,5-14,15-24,25-34,35-44,45-54,55-64,65-74,75-84,Pct < 1,Pct 1-4,Pct 5-14,Pct 15-24,Pct 25-34,Pct 35-44,Pct 45-54,Pct 55-64,Pct 65-74,Pct 75-84,Pct 85+
0,Country,Central Asia,Armenia,,,AM,,2020-03-01,03/01/2020,2020 W9,2020-02-24,2020-03-01,2/24/20 - 3/1/20,,1/14/2021,False,,,,,,,,,,,1,,,,0.033747,,,1,0.033747,,0,,,,0.0,,,0,0.0,,0,,0.0,,0,0.0,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Country,Central Asia,Armenia,,,AM,,2020-03-02,03/02/2020,2020 W10,2020-03-02,2020-03-08,3/2/20 - 3/8/20,,1/14/2021,False,,,,,,,,,,,0,,,,0.0,,,1,0.033747,,0,,,,0.0,,,0,0.0,,0,,0.0,,0,0.0,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Country,Central Asia,Armenia,,,AM,,2020-03-03,03/03/2020,2020 W10,2020-03-02,2020-03-08,3/2/20 - 3/8/20,,1/14/2021,False,,,,,,,,,,,0,,,,0.0,,,1,0.033747,,0,,,,0.0,,,0,0.0,,0,,0.0,,0,0.0,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Country,Central Asia,Armenia,,,AM,,2020-03-04,03/04/2020,2020 W10,2020-03-02,2020-03-08,3/2/20 - 3/8/20,,1/14/2021,False,,,,,,,,,,,0,,,,0.0,,,1,0.033747,,0,,,,0.0,,,0,0.0,,0,,0.0,,0,0.0,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Country,Central Asia,Armenia,,,AM,,2020-03-05,03/05/2020,2020 W10,2020-03-02,2020-03-08,3/2/20 - 3/8/20,,1/14/2021,False,,,,,,,,,,,0,,,,0.0,,,1,0.033747,,0,,,,0.0,,,0,0.0,,0,,0.0,,0,0.0,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,Country,Central Asia,Armenia,,,AM,,2020-03-06,03/06/2020,2020 W10,2020-03-02,2020-03-08,3/2/20 - 3/8/20,,1/14/2021,False,,,,,,,,,,,0,,,,0.0,,,1,0.033747,,0,,,,0.0,,,0,0.0,,0,,0.0,,0,0.0,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,Country,Central Asia,Armenia,,,AM,,2020-03-07,03/07/2020,2020 W10,2020-03-02,2020-03-08,3/2/20 - 3/8/20,,1/14/2021,False,,,,0.004821,,,,,,,0,0.142857,,,0.0,0.004821,,1,0.033747,,0,0.0,,,0.0,0.0,,0,0.0,,0,0.0,0.0,0.0,0,0.0,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,Country,Central Asia,Armenia,,,AM,,2020-03-08,03/08/2020,2020 W10,2020-03-02,2020-03-08,3/2/20 - 3/8/20,,1/14/2021,False,,,,0.0,,-0.004821,,,,,0,0.0,,,0.0,0.0,,1,0.033747,,0,0.0,,,0.0,0.0,,0,0.0,,0,0.0,0.0,0.0,0,0.0,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,Country,Central Asia,Armenia,,,AM,,2020-03-09,03/09/2020,2020 W11,2020-03-09,2020-03-15,3/9/20 - 3/15/20,,1/14/2021,False,,,,0.0,,0.0,,0.004821,,,0,0.0,,,0.0,0.0,,1,0.033747,,0,0.0,,,0.0,0.0,,0,0.0,,0,0.0,0.0,0.0,0,0.0,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,Country,Central Asia,Armenia,,,AM,,2020-03-10,03/10/2020,2020 W11,2020-03-09,2020-03-15,3/9/20 - 3/15/20,,1/14/2021,False,,,,0.0,,0.0,,0.0,,,0,0.0,,,0.0,0.0,,1,0.033747,,0,0.0,,,0.0,0.0,,0,0.0,,0,0.0,0.0,0.0,0,0.0,,,,,,,,,,,,,,,,,2963243.0,29.63243,,,,326887719.0,3268.87719,0.009065,7796609000.0,77966.09105,0.00038,0.04,63.0,0.19,5512.0,-4998.0,104.0,28470.0,1.8,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


us_check = all_df.loc[(all_df["Country"]=="United States") & (all_df["State/Province"]=="") & (all_df["Output"]==True)]
us_check.head()

In [17]:
date_check = all_df.groupby(all_df["Country"])["Time","Week"].max()
date_check.head()

Unnamed: 0_level_0,Time,Week
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,2021-01-13,2021 W2
Albania,2021-01-13,2021 W2
Algeria,2021-01-13,2021 W2
Andorra,2021-01-13,2021 W2
Angola,2021-01-13,2021 W2
