In [1]:
from collections import defaultdict 
import numpy as np
import matplotlib.pyplot as plt 
%matplotlib inline
import pandas as pd
from datetime import date, datetime, timedelta


In [2]:
import torch
from torch import nn, optim


In [3]:
def perdelta(start, end, delta):
    curr = start
    while curr < end:
        yield curr
        curr += delta
target = []
for i in perdelta(date(2020, 1, 6), date(2020, 6, 15), timedelta(days=1)):
    t =i.strftime('%Y-%m-%d')
    target.append(t)

In [4]:
cases_date = []
for i in perdelta(date(2020, 2, 1), datetime.now().date(), timedelta(days=1)):
    t =i.strftime('%Y-%m-%d')
    cases_date.append(t)
    

In [5]:
CSSE_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
cases = pd.read_csv(CSSE_url, dtype = {'FIPS' : np.str_})
cases['FIPS'] = cases['FIPS'].str[:-2].str.zfill(5)
cases['FIPS'] = cases['FIPS'].str.zfill(5)

cases = cases.dropna()
cases = cases.set_index('FIPS')
cases = cases[cases['Admin2'] != 'Unassigned']
# cases = cases[cases.index.isin(df.index)]

daily_cases = cases.iloc[:, 13:] - cases.iloc[:, 13:].shift(axis = 1)
# daily_cases_3 = daily_cases.dropna(axis = 1).rolling(window = 3, axis = 1).mean()
daily_cases = daily_cases.dropna(axis = 1).rolling(window = 7, axis = 1).mean()
daily_cases = daily_cases.dropna(axis = 1)
daily_cases.columns = cases_date

In [6]:
daily_cases_state = daily_cases.groupby(daily_cases.index.str[:2]).sum()

In [7]:
raw_cases = (cases.iloc[:, 13:] - cases.iloc[:, 13:].shift(axis = 1)).dropna(axis = 1)
raw_cases_state = raw_cases.groupby(daily_cases.index.str[:2]).sum()
raw_cases_state = raw_cases_state.iloc[:, 6:]
raw_cases_state.columns = cases_date

### Population data

In [8]:
demo = pd.read_csv('/Users/hongru/Projects/Covid_projection/data/age_US_state.csv',
                  dtype = {'FIPS' : np.str_}).set_index('FIPS')

### Load trained model

In [9]:
output_size = 7
# num_epochs = 2
input_size = 12 #input_size = number of features
sequence_length = 28 
learning_rate = 0.0001
num_layers = 1
hidden_layer_size = 512
sequence_length_features = 15

In [10]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_layer_size, num_layers, output_size):
#     input_size=3, hidden_layer_size=256, output_size=1):
        super().__init__()
#         torch.manual_seed(0)
        
        self.hidden_layer_size = hidden_layer_size
        
#         self.hidden_size = hidden_layer_size
        
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_layer_size, num_layers, batch_first=True, dropout = 0.5)

        self.linear = nn.Linear(hidden_layer_size, output_size)
    
        #hidden cell size: (hidden_size, batch_size, hidden_layer_size)
        self.hidden_cell = (torch.zeros(self.num_layers,1,self.hidden_layer_size),
                            torch.zeros(self.num_layers,1,self.hidden_layer_size))


    def forward(self, input_seq):

        lstm_out, self.hidden_cell = self.lstm(input_seq, self.hidden_cell)
        
        #only return the results for last sequence
        lstm_out = lstm_out[:, -1, :]
        predictions = self.linear(lstm_out)
        return predictions

In [11]:
m_state_dict = torch.load('/Users/hongru/Projects/Covid_projection/models/RNN-LSTM-7-day-projection_week44.pt')

In [12]:
model = LSTM(input_size, hidden_layer_size, num_layers, output_size)
model.load_state_dict(m_state_dict)

  "num_layers={}".format(dropout, num_layers))


<All keys matched successfully>

In [13]:
model

LSTM(
  (lstm): LSTM(12, 512, batch_first=True, dropout=0.5)
  (linear): Linear(in_features=512, out_features=7, bias=True)
)

### import dataframe

In [14]:
df = pd.read_pickle('/Users/hongru/Projects/Covid_projection/data/RNN_input_week44.pickle')

In [16]:
df = df.set_index(['FIPS', 'Date'])

In [17]:
df['cases'] = (df['cases']/df['total_pop'])*10000

In [18]:
from sklearn.preprocessing import MinMaxScaler


In [19]:
scaler = MinMaxScaler(feature_range = (0, 1))
scaler.fit(df.iloc[:, 1:])

MinMaxScaler(copy=True, feature_range=(0, 1))

In [20]:
train_features_normalized = scaler.transform(df.iloc[:, 1:])

In [21]:
scaler_cases = MinMaxScaler(feature_range = (0, 1))
scaler_cases.fit(np.asarray(df.iloc[:,0]).reshape(-1, 1))

MinMaxScaler(copy=True, feature_range=(0, 1))

In [22]:
train_cases_normalized = scaler_cases.transform(np.asarray(df.iloc[:,0]).reshape(-1, 1))

In [23]:
df.iloc[:, 1:] = train_features_normalized
df['cases'] = train_cases_normalized

### First round prediction

In [24]:
# test_data_size = 1
predict_data = []
# test_data = []
state_ordered = []
for i in df.index.get_level_values('FIPS').unique():
    df_state = df.iloc[df.index.get_level_values('FIPS') == i][-(sequence_length):]
    
    predict_data.append(df_state.to_numpy())
    state_ordered.append(i)


In [25]:
# validation = []
pred = []
# count = 1
result = pd.DataFrame(columns = ['State', 'prediction', 'Actual'])
with torch.no_grad():
    
    for seq in predict_data:
        
        seq = torch.tensor(seq).reshape(-1, sequence_length, input_size)
        model.hidden_cell = (torch.zeros(num_layers, 1, model.hidden_layer_size),
                        torch.zeros(num_layers, 1, model.hidden_layer_size))
        prediction = model(seq.float())
#         validation.append(labels)
        pred.append(prediction)

        

### Prediction for first week

In [26]:
df_output = pd.DataFrame(columns = ['FIPS', 'Date', 'Predicted_Cases','Week'])
j = 0
x = pd.to_datetime(daily_cases_state.loc[:,'2020-11-01':'2020-11-07'].columns)
for i in state_ordered:
    
#     plt.figure() # add this statement before your plot
    predicted = pred[j]
#     actual = validation[j]
    incidence = scaler_cases.inverse_transform(np.asarray(predicted).reshape(-1, 1))
    pred_cases = (incidence/10000)*demo.loc[i]['total_pop']
#     plt.plot(x, pred_cases, label = 'Prediction', marker = 'o')
#     plt.plot(x, raw_cases_state.loc[:,'2020-11-01':'2020-11-07'].loc[i], label = 'groundtruth', marker = 'o')
#     plt.title(i)
#     plt.legend()
#     plt.show()
    for num in range(len(x)):
        dic = {
            'FIPS' : i,
            'Date' : x[num],
            'Predicted_Cases' : pred_cases[num].item(),
#             'Ground_truth' : raw_cases_state.loc[:,'2020-11-29':'2020-12-05'].loc[i][num],
            'Week' : 'Week1'
        }
        df_output = df_output.append(dic, ignore_index = True)
    j += 1
#     print('weekly Prediction: ', np.sum(pred_cases))
#     print('weekly groundtruth: ', np.sum(raw_cases_state.loc[:,'2020-11-01':'2020-11-07'].loc[i]))


### Import feature prediction

In [27]:
m_features_state_dict = torch.load('/Users/hongru/Projects/Covid_projection/models/RNN-LSTM-7-day-features_week44.pt')

In [28]:
model_features = LSTM(input_size = 12, hidden_layer_size = 258, num_layers = 1, output_size = 9*7)
model_features.load_state_dict(m_features_state_dict)

  "num_layers={}".format(dropout, num_layers))


<All keys matched successfully>

In [29]:
model_features

LSTM(
  (lstm): LSTM(12, 258, batch_first=True, dropout=0.5)
  (linear): Linear(in_features=258, out_features=63, bias=True)
)

In [30]:
'''
Predict features for week 2
'''
pred_features_w2 = []
# count = 1
# result = pd.DataFrame(columns = ['State', 'prediction', 'Actual'])
with torch.no_grad():
    
    for seq in predict_data:
        
        seq = torch.tensor(seq).reshape(-1, 28, 12)
        model_features.hidden_cell = (torch.zeros(num_layers, 1, model_features.hidden_layer_size),
                        torch.zeros(num_layers, 1, model_features.hidden_layer_size))
        prediction = model_features(seq.float())
#         validation.append(labels)
        pred_features_w2.append(prediction)
      

In [31]:
def get_new_predict_data(pred_features, pred, predict_data, df, state_ordered):
    """
    pred_featues: get new predicted times series features
    
    pred: add new data to pred, remove extra rows
    """
    j = 0
    
    output_data = []
    for i in state_ordered:
        
        constant = (df.iloc[df.index.get_level_values('FIPS') == i].iloc[:7,-2:]).to_numpy()
        #-2 here is the number of constant features
        
        cases_pred = pred[j]
        features_pred = pred_features[j].reshape(7,9)
        #3 here is the number of time series features, not include cases
        
        new_data = np.concatenate((cases_pred.T, features_pred), axis=1)
        new_data = np.concatenate((new_data, constant), axis=1)
        
        nxt_round_data = np.concatenate((predict_data[j][7:], new_data), axis = 0)
        
        output_data.append(nxt_round_data)
        
        j += 1
        
    return output_data

In [32]:
input_data_W2 = get_new_predict_data(pred_features_w2, pred, predict_data, df, state_ordered)

In [33]:
num_layers = 1
pred_w2 = []
with torch.no_grad():
    
    for seq in input_data_W2:
        
        seq = torch.tensor(seq).reshape(-1, sequence_length, input_size)
        model.hidden_cell = (torch.zeros(num_layers, 1, model.hidden_layer_size),
                        torch.zeros(num_layers, 1, model.hidden_layer_size))
        prediction = model(seq.float())
#         validation.append(labels)
        pred_w2.append(prediction)

In [34]:
w2_date = []
for i in perdelta(date(2020, 11, 8), date(2020, 11, 15), timedelta(days=1)):
    t =i.strftime('%Y-%m-%d')
    w2_date.append(t)
    

### Prediction for next week

In [35]:
j = 0
x = pd.to_datetime(w2_date)
for i in state_ordered:
    
#     plt.figure() # add this statement before your plot
    predicted = pred_w2[j]
#     actual = validation[j]
    incidence = scaler_cases.inverse_transform(np.asarray(predicted).reshape(-1, 1))
    pred_cases_W2 = (incidence/10000)*demo.loc[i]['total_pop']
    for num in range(len(x)):
        dic = {
            'FIPS' : i,
            'Date' : x[num],
            'Predicted_Cases' : pred_cases_W2[num].item(),
#             'Ground_truth' : raw_cases_state.loc[:,'2020-11-29':'2020-12-05'].loc[i][num],
            'Week' : 'Week2'
        }
        df_output = df_output.append(dic, ignore_index = True)
    j += 1
#     print('Prediction for week 2: ', np.sum(pred_cases))

### Week 3

In [36]:
'''
Predict features for week 3
'''
pred_features_w3 = []
# count = 1
# result = pd.DataFrame(columns = ['State', 'prediction', 'Actual'])
with torch.no_grad():
    
    for seq in input_data_W2:
        
        seq = torch.tensor(seq).reshape(-1, 28, 12)
        model_features.hidden_cell = (torch.zeros(num_layers, 1, model_features.hidden_layer_size),
                        torch.zeros(num_layers, 1, model_features.hidden_layer_size))
        prediction = model_features(seq.float())
        pred_features_w3.append(prediction)
      

In [37]:
input_data_W3 = get_new_predict_data(pred_features_w3, pred_w2, input_data_W2, df, state_ordered)

In [38]:
w3_date = []
for i in perdelta(date(2020, 11, 15), date(2020, 11, 22), timedelta(days=1)):
    t =i.strftime('%Y-%m-%d')
    w3_date.append(t)
    

In [39]:
pred_w3 = []
with torch.no_grad():
    
    for seq in input_data_W3:
        
        seq = torch.tensor(seq).reshape(-1, sequence_length, input_size)
        model.hidden_cell = (torch.zeros(num_layers, 1, model.hidden_layer_size),
                        torch.zeros(num_layers, 1, model.hidden_layer_size))
        prediction = model(seq.float())
#         validation.append(labels)
        pred_w3.append(prediction)

In [40]:
j = 0
x = pd.to_datetime(w3_date)
for i in state_ordered:
    predicted = pred_w3[j]
    incidence = scaler_cases.inverse_transform(np.asarray(predicted).reshape(-1, 1))
    pred_cases_W3 = (incidence/10000)*demo.loc[i]['total_pop']
    for num in range(len(x)):
        dic = {
            'FIPS' : i,
            'Date' : x[num],
            'Predicted_Cases' : pred_cases_W3[num].item(),
#             'Ground_truth' : raw_cases_state.loc[:,'2020-11-29':'2020-12-05'].loc[i][num],
            'Week' : 'Week3'
        }
        df_output = df_output.append(dic, ignore_index = True)
    j += 1

### Week4

In [42]:
'''
Predict features for week 4
'''
pred_features_w4 = []
# count = 1
# result = pd.DataFrame(columns = ['State', 'prediction', 'Actual'])
with torch.no_grad():
    
    for seq in input_data_W3:
        
        seq = torch.tensor(seq).reshape(-1, 28, 12)
        model_features.hidden_cell = (torch.zeros(num_layers, 1, model_features.hidden_layer_size),
                        torch.zeros(num_layers, 1, model_features.hidden_layer_size))
        prediction = model_features(seq.float())
        pred_features_w4.append(prediction)
      

In [43]:
input_data_W4 = get_new_predict_data(pred_features_w4, pred_w3, input_data_W3, df, state_ordered)

In [44]:
w4_date = []
for i in perdelta(date(2020, 11, 22), date(2020, 11, 29), timedelta(days=1)):
    t =i.strftime('%Y-%m-%d')
    w4_date.append(t)
    

In [45]:
pred_w4 = []
with torch.no_grad():
    
    for seq in input_data_W4:
        
        seq = torch.tensor(seq).reshape(-1, sequence_length, input_size)
        model.hidden_cell = (torch.zeros(num_layers, 1, model.hidden_layer_size),
                        torch.zeros(num_layers, 1, model.hidden_layer_size))
        prediction = model(seq.float())
#         validation.append(labels)
        pred_w4.append(prediction)

In [46]:
j = 0
x = pd.to_datetime(w4_date)
for i in state_ordered:
    predicted = pred_w4[j]
    incidence = scaler_cases.inverse_transform(np.asarray(predicted).reshape(-1, 1))
    pred_cases_W4 = (incidence/10000)*demo.loc[i]['total_pop']
    for num in range(len(x)):
        dic = {
            'FIPS' : i,
            'Date' : x[num],
            'Predicted_Cases' : pred_cases_W4[num].item(),
#             'Ground_truth' : raw_cases_state.loc[:,'2020-11-29':'2020-12-05'].loc[i][num],
            'Week' : 'Week4'
        }
        df_output = df_output.append(dic, ignore_index = True)
    j += 1

In [47]:
# df_output.to_csv('/Users/hongru/Projects/Covid_projection/data/model_outputs_week44.csv')

In [50]:
df_output

Unnamed: 0,FIPS,Date,Predicted_Cases,Week
0,36,2020-11-01,3054.343262,Week1
1,36,2020-11-02,3117.647461,Week1
2,36,2020-11-03,3254.234619,Week1
3,36,2020-11-04,3349.499268,Week1
4,36,2020-11-05,3427.557617,Week1
...,...,...,...,...
1423,20,2020-11-24,1362.552124,Week4
1424,20,2020-11-25,1348.206177,Week4
1425,20,2020-11-26,1379.089233,Week4
1426,20,2020-11-27,1402.068604,Week4
