## All COVID-19 Cases by Country

In [8]:
import pandas as pd
import numpy as np
import os

#### Training Data

#### Confirmed Cases

In [10]:
# All Countries
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
confirmed_df = confirmed_df.drop(['Lat','Long','Province/State'], axis=1)
confirmed_df.rename(columns={'Country/Region': 'Country_Region'}, inplace = True)
confirmed_df = confirmed_df.groupby('Country_Region').sum()
confirmed_df.head()

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20,4/6/20,4/7/20,4/8/20,4/9/20,4/10/20
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,237,273,281,299,349,367,423,444,484,521
Albania,0,0,0,0,0,0,0,0,0,0,...,259,277,304,333,361,377,383,400,409,416
Algeria,0,0,0,0,0,0,0,0,0,0,...,847,986,1171,1251,1320,1423,1468,1572,1666,1761
Andorra,0,0,0,0,0,0,0,0,0,0,...,390,428,439,466,501,525,545,564,583,601
Angola,0,0,0,0,0,0,0,0,0,0,...,8,8,8,10,14,16,17,19,19,19


In [11]:
# Top Countries
n = 8
last_day = confirmed_df.columns[-1]
top_confirmed_df = confirmed_df.nlargest(n, last_day)
top_confirmed_df.index

Index(['US', 'Spain', 'Italy', 'France', 'Germany', 'China', 'United Kingdom',
       'Iran'],
      dtype='object', name='Country_Region')

#### Fatalities

In [12]:
# All Countries
fatalities_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
fatalities_df = fatalities_df.drop(['Lat','Long','Province/State'], axis=1)
fatalities_df.rename(columns={'Country/Region': 'Country_Region'}, inplace = True)
fatalities_df = fatalities_df.groupby('Country_Region').sum()

In [13]:
# Top Countries
top_fatalities_df = fatalities_df.loc[fatalities_df.index.isin(top_confirmed_df.index)]

#### Recovered

In [14]:
# All Countries
recovered_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
recovered_df = recovered_df.drop(['Lat','Long','Province/State'], axis=1)
recovered_df.rename(columns={'Country/Region': 'Country_Region'}, inplace = True)
recovered_df = recovered_df.groupby('Country_Region').sum()

In [15]:
# Top Countries
top_recovered_df = recovered_df.loc[recovered_df.index.isin(top_confirmed_df.index)]

#### Infected = Confirmed - Recovered

In [16]:
# All Countries
infected_df = confirmed_df-recovered_df

# Top Countries
top_infected_df = infected_df.loc[infected_df.index.isin(top_confirmed_df.index)]

## Reorganizing Data to First Day of Confirmed Case

In [18]:
# First Day Values
def first_day(df, country):
    row = list(df.loc[df.index == country].values[0])
    col = next((i for i, j in enumerate(row) if j.any()), None)     
    first_day = df.columns[col]
    return first_day

# Example:
# print(first_day(top_confirmed_df, "Italy"))
# print(first_day(top_fatalities_df, "Italy"))

In [19]:
# New dataframes with all non-zero values at beginning
def first_day_df(df):
    dates = df.columns.tolist()
    days = range(len(dates))
    
    new_df = pd.DataFrame(columns=days)

    countries = df.index              # list of countries
    for country in countries:
        # Grab the country's row
        row = list(df.loc[df.index == country].values[0])
        # Grab the column index of first non-zero value
        col = next((i for i, j in enumerate(row) if j.any()), None)     
        # Calculate date
        first_day = df.columns[col]
        # Reset first day
        row = row[col:] + row[:col]
        # New rearranged dataframe
        new_df = new_df.append([row])

    # Same countries indexed
    new_df.index = df.index
    new_df = new_df.replace(0,np.NaN)
    
    return new_df

In [22]:
# Confirmed Cases Since First Day
first_confirmed_df = first_day_df(confirmed_df)
first_confirmed_df

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,70,71,72,73,74,75,76,77,78,79
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,,,,,,,,,,
Albania,2,10.0,12.0,23.0,33.0,38.0,42.0,51.0,55.0,59.0,...,,,,,,,,,,
Algeria,1,1.0,1.0,1.0,1.0,1.0,3.0,5.0,12.0,12.0,...,,,,,,,,,,
Andorra,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,,,,,,,,,,
Angola,1,2.0,2.0,3.0,3.0,3.0,4.0,4.0,5.0,7.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
West Bank and Gaza,4,7.0,16.0,16.0,19.0,26.0,30.0,30.0,31.0,35.0,...,,,,,,,,,,
Western Sahara,4,4.0,4.0,4.0,4.0,4.0,,,,,...,,,,,,,,,,
Yemen,1,,,,,,,,,,...,,,,,,,,,,
Zambia,2,2.0,2.0,2.0,3.0,3.0,3.0,12.0,16.0,22.0,...,,,,,,,,,,
