In [2]:
from __future__ import division

# import requests
import json
import pandas as pd
import numpy as np
import math

from datetime import datetime, timedelta

## Hyperparameters

In [3]:
under_reporting_factor = 1
a_fixed = 0.3
recovery_days = 14
early_growth_cut_off = 5 
F_hospitalized =  .26
F_need_ICU = .082
F_need_Ventilator = .05
F_fatality = .026
r0= 1.2 #Not Required unless SIR
history_days = 5 
day0 = datetime.strptime('2020-03-24',"%Y-%m-%d")


## Common Functions

In [6]:
def cases_till_date(modelName, df, date_col, population = None):
    count = 0
    grouped_df = df.groupby(date_col)['patientId'].nunique().reset_index()
    grouped_df = grouped_df.sort_values(by=date_col, ascending=True)
    grouped_df = populate_missing_days(grouped_df,date_col)
    grouped_df['date'] = grouped_df[date_col]
    
    if(modelName == 'fixExp' or modelName == 'fitExp'):
        grouped_df['cases_count'] = grouped_df['patientId'].rolling(min_periods=1, window=recovery_days).sum()
        grouped_df['cases_count'] = round(grouped_df['cases_count'] * under_reporting_factor)
        grouped_df['population'] = population
        grouped_df = grouped_df.loc[:,['date','cases_count','population']]
        return populate_missing_days(grouped_df,'date')
        
    elif(modelName == 'sir'):
        
        #### SEMANTICS
    #### For each date the semantics of the columns
    #### new_infected -  new cases reported on a particular date
    #### active_infected - cases reported in last 14 days (earlier ones are cured)
    #### beg_susceptible - susceptible people at the begining of the day
    #### end_susceptible - susceptible people at the end of the day
    #### recovered - cases that got recovered on the particular date or the ones who were new infected on today - 14 days
    ####


        
        grouped_df['date'] = grouped_df[date_col]
        grouped_df['new_infected_count'] = grouped_df['patientId'] * under_reporting_factor
        grouped_df['active_infected_count'] = grouped_df['patientId'].rolling(min_periods=1, window=recovery_days).sum()
        grouped_df['recovered_count'] = 0
        grouped_df['end_susceptible_count'] = population - grouped_df['new_infected_count'] 
        grouped_df['beg_susceptible_count'] = population
        
        for index, row in grouped_df.iterrows():
            if(index>0):
                history_row = fetch_historical_row(index,recovery_days,grouped_df)
                grouped_df.at[index,'beg_susceptible_count'] = grouped_df.iloc[index-1,grouped_df.columns.get_loc('end_susceptible_count')]
            
                grouped_df.at[index,'recovered_count'] = history_row['new_infected_count']
            
                grouped_df.at[index,'end_susceptible_count'] = max(0,grouped_df.iloc[index,grouped_df.columns.get_loc('beg_susceptible_count')] - grouped_df.iloc[index,grouped_df.columns.get_loc('new_infected_count')] )
        
        grouped_df = grouped_df.loc[:,['date','new_infected_count','active_infected_count','recovered_count','beg_susceptible_count','end_susceptible_count','population']]
        
        return populate_missing_days(grouped_df,'date')
    
    else:
        print("modelName should be \'fitExp\', \'fixExp\', or \'sir\', but found: " + modelName)
        
        return None


def cases_last_5days(df,history_days,modelName,col_name):
    if(modelName == 'fixExp' or modelName == 'fitExp' or modelName == 'sir'):
        counts = []  
        for index, row in df.iterrows():
            flat = {}
            flat['date'] = row['date']
            for i in range(history_days):
                flat['active_infected_count_-'+str(i)] = fetch_value(df[col_name], index-i)
            counts.append(flat)
    else:
        print("modelName should be \'fitExp\', \'fixExp\', or \'sir\', but found: " + modelName)
 
    return pd.DataFrame(counts)

In [7]:
def populate_missing_days(df,date_col):    
    r = pd.date_range(start=df[date_col].min(), end=day0) 
    df = df.set_index(date_col).reindex(r).fillna(0.0).rename_axis(date_col).reset_index()
    return df

def fetch_historical_row(current_index, relative_position, df):
    #row= pd.DataFrame([[0]*df.shape[1]],columns=df.columns)
    row = df.iloc[0].copy()
    if((current_index-relative_position) >=0 ):
        row = df.iloc[current_index-relative_position,:]
    
    else:
        for column in df:
            row[column]=0
    return row

def fetch_value(df,index):
    if(index>=0):
        return df[index]
    else :
        return 0
    
def merge_dict(x, y):
    z = x.copy()   # start with x's keys and values
    z.update(y)    # modifies z with y's keys and values & returns None
    return z

def fit_a(row,history_days,col_prefix):
    a = 0
    cnt = 0
    for i in range(history_days-1):
        if(row[col_prefix+"_-"+str(i+1)]>0):
            a = a + row[col_prefix+"_-"+str(i)]/row[col_prefix+"_-"+str(i+1)]
            cnt += 1

    if(a>1):
        return math.log(a/(cnt*1.0))
    else:
        return 0

In [20]:
def preprocess_dataset_regionwise(df, history_days, modelName, region_name, region_col):
    
    all_regions_df = pd.DataFrame()
    region_names = df[region_col].unique()
    #print(df.shape)
    for region in region_names:
        flat = {}
        flat[region_col] = region
        region_filtered_df = df[df[region_col]==region]
  
        ## TODO - check semnatics with spreadhseet and change code
        if(region_filtered_df.shape[0]>0):
            if(modelName == 'fixExp' or modelName == 'fitExp'):
                cases_df = cases_till_date(modelName, region_filtered_df,'reported_date',np.mean(region_filtered_df.population))  
                cases_region_df = cases_last_5days(cases_df,history_days, modelName,'cases_count')
                cases_region_df['population'] = round(np.mean(region_filtered_df.population))
                cases_region_df[region_col] = region
                #print(cases_region_df)
            
                current_active_count = cases_region_df.iloc[-1,cases_region_df.columns.get_loc('active_infected_count_-0')]
           
                if(current_active_count>early_growth_cut_off):
                    all_regions_df = all_regions_df.append(cases_region_df)
                
            elif(modelName == 'sir'):
                cases_df = cases_till_date(modelName, region_filtered_df,'reported_date',np.mean(region_filtered_df.population))  
                cases_last_5days_df = cases_last_5days(cases_df, history_days, modelName,'active_infected_count')
             
                cases_region_df = pd.merge(cases_last_5days_df, cases_df, left_on='date', right_on='date')
                cases_region_df['population'] = round(np.mean(region_filtered_df.population))
                cases_region_df[region_col] = region
                current_active_count = cases_region_df.iloc[-1,cases_region_df.columns.get_loc('active_infected_count_-0')]
                #print(cases_region_df)
                if(current_active_count > early_growth_cut_off):
                    all_regions_df = all_regions_df.append(cases_region_df.drop_duplicates())
    return all_regions_df


## Projection Logic

In [9]:
def project_1day(row_today,a,history_days):
    row_next_day = row_today.copy()
    
    row_next_day['date'] = row_today['date'] + timedelta(days=1)
    #print("new adjusted count= "+str(row_today['adjusted_count_-0'])+" * "+str(math.exp(1*a)))
    row_next_day['active_infected_count_-0'] = round(row_today['active_infected_count_-0'] * math.exp(1*a))
    
    for i in range(history_days-1):
        row_next_day['active_infected_count_-'+str(i+1)] = row_today['active_infected_count_-'+str(i)]   

    return row_next_day


def predict_medical_demand(projection, n):
    flat ={}
    flat[str(n)+'days_projection_date'] = projection['date'] 
    flat[str(n)+'days_infected'] = projection['active_infected_count_-0']      
    flat[str(n)+'days_hospitalized'] = round(flat[str(n)+'days_infected'] * F_hospitalized)
    flat[str(n)+'days_need_icu'] = round(flat[str(n)+'days_infected'] * F_need_ICU)
    flat[str(n)+'days_need_ventilator'] = round(flat[str(n)+'days_infected'] * F_need_Ventilator)
    flat[str(n)+'days_fatality'] = round(flat[str(n)+'days_infected'] * F_fatality)          
    return flat

def predict_medical_demand_sir(projection,n):
    flat = {}
    
    flat[str(n)+'days_projection_date'] = projection['date'] 
    flat[str(n)+'days_infected'] = projection['active_infected_count']      
    flat[str(n)+'days_hospitalized'] = round(flat[str(n)+'days_infected'] * F_hospitalized)
    flat[str(n)+'days_need_icu'] = round(flat[str(n)+'days_infected'] * F_need_ICU)
    flat[str(n)+'days_need_ventilator'] = round(flat[str(n)+'days_infected'] * F_need_Ventilator)
    flat[str(n)+'days_fatality'] = round(flat[str(n)+'days_infected'] * F_fatality)        
    return flat

def predict_medical_demand_fixExp(projection,a,day_offset,n):
    flat ={}
    flat[str(n)+'days_projection_date'] = projection['date'] + timedelta(days=(n+day_offset))
    flat[str(n)+'days_infected'] = round(projection['active_infected_count_-0']  *  math.exp((n+day_offset)*a))
    flat[str(n)+'days_hospitalized'] = round(flat[str(n)+'days_infected'] * F_hospitalized)
    flat[str(n)+'days_need_icu'] = round(flat[str(n)+'days_infected'] * F_need_ICU)
    flat[str(n)+'days_need_ventilator'] = round(flat[str(n)+'days_infected'] * F_need_Ventilator)
    flat[str(n)+'days_fatality'] = round(flat[str(n)+'days_infected'] * F_fatality)          
    return flat


In [10]:
def project_ndays_fitExp(row_today,day_offset,n):
    projections = []
    save_today = row_today
    z = {}
       
    for i in range(n+day_offset):
        a = fit_a(row_today,history_days,'active_infected_count')
        
        row_next_day = project_1day(row_today,a,history_days)
        row_today = row_next_day
        
    return predict_medical_demand(row_today,n),a



pd.set_option('display.float_format', lambda x: '%.3f' % x)
def project_ndays_sir(count_ts_df,day_offset,n,recovery_days):

    ## TODO - check semnatics with spreadhseet and change code 
    for i in range(n+day_offset):
        
        row_today = count_ts_df.iloc[-1,:]
        row_prior_recovery_days = fetch_historical_row(count_ts_df.shape[0], 14, count_ts_df)
                
        #row_next_day = fetch_historical_row(i, 0, count_ts_df)        
        row_next_day = row_today.copy()
        #print("old ")
        row_next_day['date'] = row_today['date'] + timedelta(days=1) 
        row_next_day['beg_susceptible_count'] = row_today['end_susceptible_count']        
        row_next_day['new_infected_count'] = round(row_today['new_infected_count'] * r0 * (row_next_day['beg_susceptible_count']/(1.0*row_today['population'])))
        row_next_day['recovered_count'] =  row_prior_recovery_days['new_infected_count']
        row_next_day['active_infected_count'] = row_today['active_infected_count'] - row_next_day['recovered_count'] + row_next_day['new_infected_count']
        row_next_day['end_susceptible_count'] = max(0, row_next_day['beg_susceptible_count'] - row_next_day['recovered_count'] - row_next_day['new_infected_count'])
                
        count_ts_df = count_ts_df.append(row_next_day)
       
    
    latest_row = count_ts_df.iloc[-1,:]
    return predict_medical_demand_sir(latest_row,n)

def rename_dict(proj_dict,n):
    proj_dict['projection_date']  = proj_dict.pop(str(n)+'days_projection_date') 
    proj_dict['infected_count'] = proj_dict.pop(str(n)+'days_infected') 
    proj_dict['hospitalized_count'] = proj_dict.pop(str(n)+'days_hospitalized')  
    proj_dict['need_icu_count'] = proj_dict.pop(str(n)+'days_need_icu') 
    proj_dict['need_ventilator_count'] = proj_dict.pop(str(n)+'days_need_ventilator')  
    proj_dict['fatality_count'] = proj_dict.pop(str(n)+'days_fatality')           
    return proj_dict
        

In [11]:
def projections(region_date_count_df, nList, modelName, region=None, region_col=""):
    final_projections = []

    if(modelName == 'fixExp' or modelName == 'fitExp'):
        
        grouped_df = region_date_count_df[region_date_count_df.groupby([region_col])['date'].transform(max) == region_date_count_df['date']]   
        for index, row in grouped_df.iterrows():        
            flat = {}
            
            #print(row[region_col],row['date'],row['active_infected_count_-0'])
            flat[region_col] = row[region_col]
            flat['current_date'] = row['date']
            #print(row['population'])
            flat['population'] = row['population']
            flat['current_infected_count'] = row['active_infected_count_-0']
            flat['current day-4 Infected'] = row['active_infected_count_-4']
            flat['current day-3 Infected'] = row['active_infected_count_-3']
            flat['current day-2 Infected'] = row['active_infected_count_-2']
            flat['current day-1 Infected'] = row['active_infected_count_-1']


            day_offset = (datetime.today() - row['date']).days
            if(modelName == 'fitExp'):
                projs_dict = {}
                for time in nList:
                    projection,a = project_ndays_fitExp(row, day_offset, time)
                    projs_dict = merge_dict(projs_dict, projection)
                    
                flat['a_fitted'] = a
                
            elif(modelName == 'fixExp'):
                a = a_fixed
                projs_dict = {}
                for time in nList:
                    projection = predict_medical_demand_fixExp(row,a, day_offset, time)
                    projs_dict = merge_dict(projs_dict, projection)

            temp = merge_dict(flat,projs_dict)
            final_projections.append(temp)
            #print(row)

    elif(modelName == 'sir'):
        all_regions_df = pd.DataFrame() 
        regions = region_date_count_df[region_col].unique()
        for region in regions:
            region_filtered_df = region_date_count_df[region_date_count_df[region_col]==region]

            flat = {}
            last_row = region_filtered_df.iloc[-1,:]
            flat[region_col] = region
            flat['population'] = region_filtered_df['population'].mean()
            flat['current_date'] = last_row['date']
            flat['current_infected_count'] = last_row['active_infected_count_-0']
            flat['current day-4 Infected'] = last_row['active_infected_count_-4']
            flat['current day-3 Infected'] = last_row['active_infected_count_-3']
            flat['current day-2 Infected'] = last_row['active_infected_count_-2']
            flat['current day-1 Infected'] = last_row['active_infected_count_-1']

            last_date = region_filtered_df.iloc[-1,region_filtered_df.columns.get_loc('date')]
            day_offset = (datetime.today() - last_date).days

            projs_dict = {}
            for time in nList:
                    projection = project_ndays_sir(region_filtered_df, day_offset, time, recovery_days)
                    projs_dict = merge_dict(projs_dict, projection)
                    #print(projs_dict)

            temp = merge_dict(flat,projs_dict)
            final_projections.append(temp)
    else:
        print("modelName should be \'fitExp\', \'fixExp\', or \'sir\', but found: " + modelName) 
        
    
    #print(final_projections)
    return pd.DataFrame(final_projections)


In [23]:
def projections_format2(region_date_count_df, n_days, modelName, region=None, region_col=""):
    final_projections = []

    if(modelName == 'fixExp' or modelName == 'fitExp'):
        #print(region_date_count_df)
        grouped_df = region_date_count_df[region_date_count_df.groupby([region_col])['date'].transform(max) == region_date_count_df['date']]   
        for index, row in grouped_df.iterrows():        
            
            #print(row[region_col],row['date'],row['active_infected_count_-0'])
            region_name = row[region_col]
            #print(region_name)
            day_offset = (datetime.today() - row['date']).days
            if(modelName == 'fitExp'):
                projs_dict = {}
                for time in range(n_days+day_offset):
                    projection,a = project_ndays_fitExp(row, 0, time+1)
                    projection[region_col] = region_name
                    projection['a_fitted'] = a
                    renamed_dict = rename_dict(projection, time+1)
                    final_projections.append(renamed_dict)

                
            elif(modelName == 'fixExp'):
                a = a_fixed
                projs_dict = {}
                for time in range(n_days+day_offset):
                    projection = predict_medical_demand_fixExp(row,a, 0, time+1)
                    projection[region_col] = region_name
                    renamed_dict = rename_dict(projection, time+1)
                    final_projections.append(renamed_dict)

            #print(row)

    elif(modelName == 'sir'):
        all_regions_df = pd.DataFrame() 
        regions = region_date_count_df[region_col].unique()
        for region in regions:
            region_filtered_df = region_date_count_df[region_date_count_df[region_col]==region]

            flat = {}
            last_row = region_filtered_df.iloc[-1,:]

            last_date = region_filtered_df.iloc[-1,region_filtered_df.columns.get_loc('date')]
            day_offset = (datetime.today() - last_date).days

            projs_dict = {}
            for time in range(n_days+day_offset):
                    projection = {}
                    projection = project_ndays_sir(region_filtered_df, 0, time+1, recovery_days)
                    projection[region_col] = region
                    renamed_dict = rename_dict(projection, time+1)
                    final_projections.append(renamed_dict)

    else:
        print("modelName should be \'fitExp\', \'fixExp\', or \'sir\', but found: " + modelName) 
        
    
    #print(final_projections)
    return pd.DataFrame(final_projections)




# TopLevel Function

In [24]:
def predict(modelName, nList, patient_data_file, region, region_col,output_format, output_file_path, region_population_data_file = None):
    patient_data_df = pd.read_csv(patient_data_file,na_values='')
    region_population = pd.read_csv(region_population_data_file,sep=",")
        #print(region_population)
    patient_data_df[region_col] = patient_data_df[region_col].str.strip()
    region_population[region_col] = region_population[region_col].str.strip()
    region_population_df = region_population.loc[:,[region_col,'population']]
    patient_data_df = patient_data_df.merge(region_population_df,on=region_col)
        
    patient_data_df['reported_date'] = pd.to_datetime(patient_data_df['reportedOn'], infer_datetime_format=True)
    
    end = datetime.today()
    patient_data_df['reported_date'] = patient_data_df['reported_date'].fillna(end)
    
    region_wise_counts = preprocess_dataset_regionwise(patient_data_df, history_days, modelName, region, region_col)

    region_file = region + "wise_counts_model_"+ modelName +"_under_reporting_factor_" + str(under_reporting_factor) + ".csv"
    region_wise_counts.to_csv(region_file)

    outfile_prefix = output_file_path[0:output_file_path.rfind('.')]
    date = datetime.today().strftime("%b%d%Y")
    
    if(region_wise_counts.shape[0]>0):
        if(output_format==1):
            projections_df = projections(region_wise_counts, nList, modelName,  region, region_col)
            outfile_prefix = outfile_prefix +"_"+ str(date) +" "+ region + "wise_model_"+ modelName +"_under_reporting_factor_" + str(under_reporting_factor) 

        elif(output_format==2):
            projections_df = projections_format2(region_wise_counts, nList, modelName,  region, region_col)
            outfile_prefix = outfile_prefix +"_"+ str(date) +" "+ region + "_and_datewise_model_"+ modelName +"_under_reporting_factor_" + str(under_reporting_factor) 

    
        
        if(modelName == 'sir'):
            outfile_prefix = outfile_prefix + '_r0_' + str(r0)
        elif(modelName == 'fixExp'):
            outfile_prefix = outfile_prefix + '_a_' + str(a_fixed)
        
        projections_df.to_csv(outfile_prefix+".csv")
        #print(projections_df)

        return projections_df.drop_duplicates()
    else:
        print("No "+region+" with active count greater than threshold")

In [30]:
####  output_format = 1 for medical demand and output_format = 2 for daywise count


##### region = 'district' and region_col ='district' and population_data_file = 'state_population.csv'  for district level
#### region = 'state' and region_col ='state' and population_data_file = 'district_population.csv' for state level
#### region = 'country' and region_col ='country' and population_data_file = 'country_population.csv' for country level


input_file = 'patient_id_data_2403_district_imputed.csv'
model = 'sir'
output_format = 2
output_file = '⁨⁩counts.csv'
population_data_file = 'state_population.csv'
region = 'state'
region_col = 'state'


In [31]:
projections_df = predict(model, 14, input_file, region, region_col, output_format , output_file ,population_data_file)


(546, 18)
    active_infected_count_-0  active_infected_count_-1  \
0                      1.000                     0.000   
1                      1.000                     1.000   
2                      1.000                     1.000   
3                      2.000                     1.000   
4                      3.000                     2.000   
5                      3.000                     3.000   
6                      3.000                     3.000   
7                      3.000                     3.000   
8                      3.000                     3.000   
9                      3.000                     3.000   
10                     3.000                     3.000   
11                     3.000                     3.000   
12                     3.000                     3.000   
13                     3.000                     3.000   
14                     2.000                     3.000   
15                     2.000                     2.000   
16  

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the 

    active_infected_count_-0  active_infected_count_-1  \
0                     14.000                     0.000   
1                     14.000                    14.000   
2                     14.000                    14.000   
3                     14.000                    14.000   
4                     14.000                    14.000   
5                     14.000                    14.000   
6                     14.000                    14.000   
7                     14.000                    14.000   
8                     14.000                    14.000   
9                     14.000                    14.000   
10                    14.000                    14.000   
11                    14.000                    14.000   
12                    14.000                    14.000   
13                    15.000                    14.000   
14                     3.000                    15.000   
15                     3.000                     3.000   
16            

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the 

    active_infected_count_-0  active_infected_count_-1  \
0                      1.000                     0.000   
1                      1.000                     1.000   
2                      1.000                     1.000   
3                      1.000                     1.000   
4                      1.000                     1.000   
5                      1.000                     1.000   
6                      1.000                     1.000   
7                      3.000                     1.000   
8                      3.000                     3.000   
9                      5.000                     3.000   
10                     6.000                     5.000   
11                     7.000                     6.000   
12                     8.000                     7.000   

    active_infected_count_-2  active_infected_count_-3  \
0                      0.000                     0.000   
1                      0.000                     0.000   
2            

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the 

   active_infected_count_-0  active_infected_count_-1  \
0                     4.000                     0.000   
1                     4.000                     4.000   
2                     6.000                     4.000   
3                     6.000                     6.000   
4                     7.000                     6.000   

   active_infected_count_-2  active_infected_count_-3  \
0                     0.000                     0.000   
1                     0.000                     0.000   
2                     4.000                     0.000   
3                     4.000                     4.000   
4                     6.000                     4.000   

   active_infected_count_-4       date  new_infected_count  \
0                     0.000 2020-03-20               4.000   
1                     0.000 2020-03-21               0.000   
2                     0.000 2020-03-22               2.000   
3                     0.000 2020-03-23               0.000   
4   

In [32]:
projections_df

Unnamed: 0,fatality_count,hospitalized_count,infected_count,need_icu_count,need_ventilator_count,projection_date,state
0,3.000,28.000,109.000,9.000,5.000,2020-03-25,Kerala
1,3.000,33.000,127.000,10.000,6.000,2020-03-26,Kerala
2,4.000,38.000,148.000,12.000,7.000,2020-03-27,Kerala
3,5.000,46.000,177.000,15.000,9.000,2020-03-28,Kerala
4,5.000,55.000,210.000,17.000,10.000,2020-03-29,Kerala
5,6.000,65.000,249.000,20.000,12.000,2020-03-30,Kerala
6,8.000,78.000,299.000,25.000,15.000,2020-03-31,Kerala
7,9.000,93.000,359.000,29.000,18.000,2020-04-01,Kerala
8,11.000,112.000,430.000,35.000,22.000,2020-04-02,Kerala
9,13.000,131.000,504.000,41.000,25.000,2020-04-03,Kerala
