In [1]:
import numpy as np
import pandas as pd

In [2]:
df_names = ['confirmed_US', 'deaths_US'] 
df_list = [pd.DataFrame() for df in df_names]
df_dict = dict(zip(df_names, df_list))

In [3]:
url_part = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_'

for key, value in df_dict.items():
    value = pd.read_csv(url_part+key+'.csv', parse_dates=[0])
    
    if key == 'confirmed_US':
        num_of_dim_col = 11
    elif key == 'deaths_US':
        num_of_dim_col = 12
        
    dim_col = value.columns[0:num_of_dim_col]
    date_col = value.columns[num_of_dim_col:]
    
    value = value.melt(id_vars = dim_col, value_vars = date_col, var_name = 'Date', value_name = key)
    
    value['Date'] = pd.to_datetime(value['Date'])
    
    df_dict[key] = value
    print(value.head())
    print ("----------------------------------------------------------")
    print (key+" Cases Dataframe Created\n----------------------------------------------------------")

   UID iso2 iso3  code3  FIPS Admin2            Province_State Country_Region  \
0   16   AS  ASM     16  60.0    NaN            American Samoa             US   
1  316   GU  GUM    316  66.0    NaN                      Guam             US   
2  580   MP  MNP    580  69.0    NaN  Northern Mariana Islands             US   
3  630   PR  PRI    630  72.0    NaN               Puerto Rico             US   
4  850   VI  VIR    850  78.0    NaN            Virgin Islands             US   

       Lat     Long_                  Combined_Key       Date  confirmed_US  
0 -14.2710 -170.1320            American Samoa, US 2020-01-22             0  
1  13.4443  144.7937                      Guam, US 2020-01-22             0  
2  15.0979  145.6739  Northern Mariana Islands, US 2020-01-22             0  
3  18.2208  -66.5901               Puerto Rico, US 2020-01-22             0  
4  18.3358  -64.8963            Virgin Islands, US 2020-01-22             0  
---------------------------------------------

In [4]:
join_on_col = ['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State', 'Country_Region', 'Lat', 'Long_', 'Combined_Key', 'Date']
# join_on_col = ['Province_State','Country_Region','Lat','Long_','Date']
df_COVID = df_dict['confirmed_US'].merge(df_dict['deaths_US'], on=join_on_col, how='outer')
df_COVID.rename(columns = {'confirmed_US':'Confirmed', 'deaths_US':'Deaths'}, inplace = True)

In [5]:
df_COVID.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Date,Confirmed,Population,Deaths
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,"American Samoa, US",2020-01-22,0,55641,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,"Guam, US",2020-01-22,0,164229,0
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,"Northern Mariana Islands, US",2020-01-22,0,55144,0
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,"Puerto Rico, US",2020-01-22,0,2933408,0
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,"Virgin Islands, US",2020-01-22,0,107268,0


In [6]:
df_COVID.to_csv('COVID-19-USA.csv', index=False)

In [7]:
print("Last Updated Data As On: ", df_COVID.Date.max())

Last Updated Data As On:  2020-06-22 00:00:00
