## All COVID-19 Cases by Country

In [None]:
import pandas as pd
import numpy as np
import os

In [105]:
# Specific to JHCSSE data
def clean_data(file):
    df = pd.read_csv(file)
    df = df.drop(['Lat','Long','Province/State'], axis=1)
    df.rename(columns={'Country/Region': 'Country_Region'}, inplace = True)
    df = df.groupby('Country_Region').sum()
    df.rename(index={'US':'United States',
        'Taiwan*':'Taiwan',
        'Korea, South':'South Korea',
        'Congo (Brazzaville)':'Congo',
        'Congo (Kinshasa)':'Democratic Republic of Congo',
        'Czechia':'Czech Republic',
        'Gambia, The':'Gambia',},inplace=True)
    return df

#### Confirmed Cases

In [109]:
# All Countries
confirmed_df = clean_data('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
confirmed_df.head(1)

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20,4/6/20,4/7/20,4/8/20,4/9/20,4/10/20
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,237,273,281,299,349,367,423,444,484,521


In [62]:
# Top Countries
n = 8
last_day = confirmed_df.columns[-1]
top_confirmed_df = confirmed_df.nlargest(n, last_day)
top_confirmed_df.index

Index(['United States', 'Spain', 'Italy', 'France', 'Germany', 'China',
       'United Kingdom', 'Iran'],
      dtype='object', name='Country_Region')

#### Fatalities

In [108]:
# All Countries
fatalities_df = clean_data('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')

# Top Countries
top_fatalities_df = fatalities_df.loc[fatalities_df.index.isin(top_confirmed_df.index)]

#### Recovered

In [112]:
# All Countries
recovered_df = clean_data('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

# Top Countries
top_recovered_df = recovered_df.loc[recovered_df.index.isin(top_confirmed_df.index)]

#### Infected = Confirmed - Recovered

In [68]:
# All Countries
infected_df = confirmed_df-recovered_df

# Top Countries
top_infected_df = infected_df.loc[infected_df.index.isin(top_confirmed_df.index)]

#### Most recent

In [97]:
def recent_stat(country_name, df):
    lookup = list(df.loc[df.index == country_name].values[0])
    lookup.reverse()
    most_recent = next((i for i in lookup if i.any()), None)
    return most_recent

recent_stat("Afghanistan", fatalities_df)

15

In [184]:
countries = confirmed_df.index
all_df = pd.DataFrame(columns=['Country_Region', 'Confirmed', 'Recovered', 'Fatalities'])
for country in countries:
    all_df = all_df.append({'Country_Region': country,
                            'Confirmed': recent_stat(country, confirmed_df),
                            'Recovered': recent_stat(country, recovered_df),
                            'Fatalities': recent_stat(country, fatalities_df),
                           }, ignore_index=True)

In [185]:
all_df = all_df.append(all_df.sum(), ignore_index=True)
all_df = all_df.replace(all_df.iloc[-1,0], 'Global')
all_df.tail()

Unnamed: 0,Country_Region,Confirmed,Recovered,Fatalities
181,Western Sahara,4,,
182,Yemen,1,,
183,Zambia,40,25.0,2.0
184,Zimbabwe,13,,3.0
185,Global,1691719,376112.0,102525.0


In [120]:
# Save JSON
# all_df.to_json('all.json', orient='records')

## Reorganizing Data to First Day of Confirmed Case

In [18]:
# First Day Values
def first_day(df, country):
    row = list(df.loc[df.index == country].values[0])
    col = next((i for i, j in enumerate(row) if j.any()), None)     
    first_day = df.columns[col]
    return first_day

# Example:
# print(first_day(top_confirmed_df, "Italy"))
# print(first_day(top_fatalities_df, "Italy"))

In [19]:
# New dataframes with all non-zero values at beginning
def first_day_df(df):
    dates = df.columns.tolist()
    days = range(len(dates))
    
    new_df = pd.DataFrame(columns=days)

    countries = df.index              # list of countries
    for country in countries:
        # Grab the country's row
        row = list(df.loc[df.index == country].values[0])
        # Grab the column index of first non-zero value
        col = next((i for i, j in enumerate(row) if j.any()), None)     
        # Calculate date
        first_day = df.columns[col]
        # Reset first day
        row = row[col:] + row[:col]
        # New rearranged dataframe
        new_df = new_df.append([row])

    # Same countries indexed
    new_df.index = df.index
    new_df = new_df.replace(0,np.NaN)
    
    return new_df

In [121]:
# Confirmed Cases Since First Day
first_confirmed_df = first_day_df(top_confirmed_df)
first_confirmed_df

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,70,71,72,73,74,75,76,77,78,79
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
United States,1,1,2,2,5,5,5,5,5,7,...,213372.0,243762.0,275586.0,308853.0,337072.0,366667.0,396223.0,429052.0,461437.0,496535.0
Spain,1,1,1,1,1,1,1,1,2,2,...,,,,,,,,,,
Italy,2,2,2,2,2,2,2,3,3,3,...,147577.0,,,,,,,,,
France,2,3,3,3,4,5,5,5,6,6,...,65202.0,90848.0,93773.0,98963.0,110065.0,113959.0,118781.0,125931.0,,
Germany,1,4,4,4,5,8,10,12,12,12,...,103374.0,107663.0,113296.0,118181.0,122171.0,,,,,
China,548,643,920,1406,2075,2877,5509,6087,8141,9802,...,82361.0,82432.0,82511.0,82543.0,82602.0,82665.0,82718.0,82809.0,82883.0,82941.0
United Kingdom,2,2,2,2,2,2,2,3,3,3,...,74605.0,,,,,,,,,
Iran,2,5,18,28,43,61,95,139,245,388,...,,,,,,,,,,


In [122]:
first_confirmed_all = first_day_df(confirmed_df)
first_confirmed_all

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,70,71,72,73,74,75,76,77,78,79
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,,,,,,,,,,
Albania,2,10.0,12.0,23.0,33.0,38.0,42.0,51.0,55.0,59.0,...,,,,,,,,,,
Algeria,1,1.0,1.0,1.0,1.0,1.0,3.0,5.0,12.0,12.0,...,,,,,,,,,,
Andorra,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,,,,,,,,,,
Angola,1,2.0,2.0,3.0,3.0,3.0,4.0,4.0,5.0,7.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
West Bank and Gaza,4,7.0,16.0,16.0,19.0,26.0,30.0,30.0,31.0,35.0,...,,,,,,,,,,
Western Sahara,4,4.0,4.0,4.0,4.0,4.0,,,,,...,,,,,,,,,,
Yemen,1,,,,,,,,,,...,,,,,,,,,,
Zambia,2,2.0,2.0,2.0,3.0,3.0,3.0,12.0,16.0,22.0,...,,,,,,,,,,
