In [1]:
import dask.dataframe as dd
import pandas as pd

In [2]:
df = dd.read_csv("timeseries.csv", parse_dates=['date'], dtype={'population': 'float64', 'cases': 'float64'})

us_states_df = df[(df['country'] == 'United States') & (df['state'].notnull())]

filtered_df = us_states_df[(us_states_df['date'] >= '2020-01-01') & (us_states_df['date'] <= '2021-02-28')]

deaths_per_state = filtered_df.groupby('state')['deaths'].sum().compute()
avg_population_per_state = filtered_df.groupby('state')['population'].mean().compute()

per_capita_mortality = (deaths_per_state / avg_population_per_state).dropna()
ranked_per_capita_mortality = per_capita_mortality.sort_values(ascending=False)
print(ranked_per_capita_mortality)

state
New York                        6.395701
Michigan                        3.204753
Louisiana                       2.735288
Illinois                        2.043863
New Jersey                      2.031200
Georgia                         2.026085
Pennsylvania                    1.831572
Virginia                        1.359063
Mississippi                     1.356705
Indiana                         1.353423
Ohio                            1.058943
Iowa                            0.958771
Massachusetts                   0.904775
Colorado                        0.894296
Minnesota                       0.768853
Kentucky                        0.751512
Texas                           0.737643
Missouri                        0.711146
Connecticut                     0.677952
Maryland                        0.658347
Alabama                         0.599759
North Carolina                  0.549190
Florida                         0.545793
Wisconsin                       0.437617
Nebraska  

In [3]:
# Compute Case Fatality Rate (CFR) per month
filtered_df['year_month'] = filtered_df['date'].dt.to_period('M')
deaths_per_month = filtered_df.groupby(['state', 'year_month'])['deaths'].sum().compute()
cases_per_month = filtered_df.groupby(['state', 'year_month'])['cases'].sum().compute()
cfr_per_month = (deaths_per_month / cases_per_month).dropna()
cfr_matrix = cfr_per_month.unstack(level=1).fillna(0)
print(cfr_matrix)

year_month                     2020-03   2020-04   2020-05   2020-06  \
state                                                                  
Alabama                       0.005323  0.028309  0.038893  0.029629   
Alaska                        0.003350  0.023145  0.021969  0.013032   
Arizona                       0.000000  0.014865  0.019922  0.002115   
Arkansas                      0.009157  0.019115  0.021296  0.015152   
California                    0.020067  0.034800  0.039834  0.031787   
Colorado                      0.009393  0.026366  0.053720  0.054192   
Connecticut                   0.018148  0.064776  0.090162  0.093831   
Delaware                      0.013341  0.027340  0.035748  0.041647   
Florida                       0.008427  0.029057  0.042772  0.034043   
Georgia                       0.021856  0.039810  0.044530  0.043123   
Guam                          0.027027  0.036155  0.031863  0.027503   
Hawaii                        0.000363  0.000000  0.004811  0.01

In [4]:
# Compute ranking based on CFR changes over time
delta_cfr = cfr_matrix.diff(axis=1).fillna(0)
cfr_change_score = delta_cfr.abs().sum(axis=1)
cfr_change_ranking = cfr_change_score.sort_values(ascending=False)
print(cfr_change_ranking)

state
Northern Mariana Islands        0.289362
Michigan                        0.173121
Connecticut                     0.171868
United States Virgin Islands    0.167598
New Jersey                      0.161011
Massachusetts                   0.143764
Pennsylvania                    0.139310
New York                        0.126554
New Hampshire                   0.126047
Puerto Rico                     0.116179
Rhode Island                    0.115809
Ohio                            0.105205
Louisiana                       0.101040
Missouri                        0.099612
Colorado                        0.098992
Maryland                        0.093513
Indiana                         0.093033
Washington, D.C.                0.092233
Minnesota                       0.083464
Illinois                        0.083174
Oklahoma                        0.083038
Wisconsin                       0.081135
New Mexico                      0.081135
Vermont                         0.080092
Kentucky  