In [1]:
import pandas as pd

nyt_url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us.csv'
jh_url_death = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
jh_url_case = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'

nyt_df = pd.read_csv(nyt_url)
jh_df_death = pd.read_csv(jh_url_death)
jh_df_case = pd.read_csv(jh_url_case)
print('Loading function')

Loading function


In [2]:
# Renaming the columns to be properly capitalized 
nyt_df.rename(columns={'date': 'Date', 'cases': 'Cases', 'deaths': 'Deaths'}, inplace = True)

In [3]:
# Filters out countries that are not the US since these are global Covid statistics.
jh_df_us_death = jh_df_death.loc[jh_df_death['Country/Region'] == 'US']
# Drops unnecessary columns
jh_df_us_death = jh_df_us_death.drop(['Country/Region', 'Province/State', 'Lat', 'Long'], axis=1)
# Unpivots dataframe from wide to long format
jh_df_us_death = jh_df_us_death.melt(var_name='Date', value_name='Deaths')

print(jh_df_us_death)

         Date   Deaths
0     1/22/20        0
1     1/23/20        0
2     1/24/20        0
3     1/25/20        0
4     1/26/20        0
...       ...      ...
1044  12/1/22  1081147
1045  12/2/22  1081412
1046  12/3/22  1081431
1047  12/4/22  1081431
1048  12/5/22  1081638

[1049 rows x 2 columns]


In [4]:
# Repeat steps above for case statistics
jh_df_us_case = jh_df_case.loc[jh_df_case['Country/Region'] == 'US']
jh_df_us_case = jh_df_us_case.drop(['Country/Region', 'Province/State', 'Lat', 'Long'], axis=1)
jh_df_us_case = jh_df_us_case.melt(var_name='Date', value_name='Cases')


In [5]:
# Joining Hopkins Data Frames
jh_df_us = pd.merge(jh_df_us_death, jh_df_us_case, on=['Date'])
print(jh_df_us)

         Date   Deaths     Cases
0     1/22/20        0         1
1     1/23/20        0         1
2     1/24/20        0         2
3     1/25/20        0         2
4     1/26/20        0         5
...       ...      ...       ...
1044  12/1/22  1081147  98924207
1045  12/2/22  1081412  98962019
1046  12/3/22  1081431  98967906
1047  12/4/22  1081431  98972375
1048  12/5/22  1081638  99023619

[1049 rows x 3 columns]


In [6]:
# Converts 'Date' columns to datetime data type
nyt_df["Date"] = pd.to_datetime(nyt_df["Date"])
jh_df_us["Date"] = pd.to_datetime(jh_df_us["Date"])

In [7]:
print(nyt_df.dtypes)
print(jh_df_us_death.dtypes)
print(jh_df_us_case.dtypes)

Date      datetime64[ns]
Cases              int64
Deaths             int64
dtype: object
Date      object
Deaths     int64
dtype: object
Date     object
Cases     int64
dtype: object


In [10]:
print(nyt_df)
print(jh_df_us)
print(jh_df_us.dtypes)



           Date     Cases   Deaths
0    2020-01-21         1        0
1    2020-01-22         1        0
2    2020-01-23         1        0
3    2020-01-24         2        0
4    2020-01-25         3        0
...         ...       ...      ...
1045 2022-12-01  98616721  1088564
1046 2022-12-02  98690152  1088978
1047 2022-12-03  98695013  1088997
1048 2022-12-04  98699619  1088997
1049 2022-12-05  98753618  1089181

[1050 rows x 3 columns]
           Date   Deaths     Cases
0    2020-01-22        0         1
1    2020-01-23        0         1
2    2020-01-24        0         2
3    2020-01-25        0         2
4    2020-01-26        0         5
...         ...      ...       ...
1044 2022-12-01  1081147  98924207
1045 2022-12-02  1081412  98962019
1046 2022-12-03  1081431  98967906
1047 2022-12-04  1081431  98972375
1048 2022-12-05  1081638  99023619

[1049 rows x 3 columns]
Date      datetime64[ns]
Deaths             int64
Cases              int64
dtype: object
