In [1]:
import torch

import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.preprocessing import MinMaxScaler
from pandas.plotting import register_matplotlib_converters
from torch import nn, optim

rcParams['figure.figsize'] = 14, 10
register_matplotlib_converters()

In [2]:
dict = { 
  "AP": 'Andhra Pradesh',
  "AR": 'Arunachal Pradesh',
  "AS": 'Assam',
  "BR": 'Bihar',
  "CT": 'Chhattisgarh',
  "GA": 'Goa',
  "GJ": 'Gujarat',
  "HR": 'Haryana',
  "HP": 'Himachal Pradesh',
  "JH": 'Jharkhand',
  "KA": 'Karnataka',
  "KL": 'Kerala',
  "MP": 'Madhya Pradesh',
  "MH": 'Maharashtra',
  "MN": 'Manipur',
  "ML": 'Meghalaya',
  "MZ": 'Mizoram',
  "NL": 'Nagaland',
  "OR": 'Odisha',
  "PB": 'Punjab',
  "RJ": 'Rajasthan',
  "SK": 'Sikkim',
  "TN": 'Tamil Nadu',
  "TG": 'Telangana',
  "TR": 'Tripura',
  "UT": 'Uttarakhand',
  "UP": 'Uttar Pradesh',
  "WB": 'West Bengal',
  "AN": 'Andaman and Nicobar Islands',
  "CH": 'Chandigarh',
  "DN": 'Dadra and Nagar Haveli',
  "DD": 'Daman and Diu',  
  "DL": 'Delhi',
  "JK": 'Jammu and Kashmir',
  "LA": 'Ladakh',
  "LD": 'Lakshadweep',
  "PY": 'Puducherry',
  "TT": 'India',
  "UN": 'Unassigned',
}

In [3]:
RANDOM_SEED = 2
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
df = pd.read_csv('https://api.covid19india.org/csv/latest/state_wise_daily.csv')
confirmed_df = df[df.Status == "Confirmed"]
confirmed_df = confirmed_df.reset_index(drop = True)
confirmed_df = confirmed_df.drop("Status", axis = 1)
confirmed_df

Unnamed: 0,Date,TT,AN,AP,AR,AS,BR,CH,CT,DN,...,PB,RJ,SK,TN,TG,TR,UP,UT,WB,UN
0,14-Mar-20,81,0,1,0,0,0,0,0,0,...,1,3,0,1,1,0,12,0,0,0
1,15-Mar-20,27,0,0,0,0,0,0,0,0,...,0,1,0,0,2,0,1,0,0,0
2,16-Mar-20,15,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
3,17-Mar-20,11,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,2,0,1,0
4,18-Mar-20,37,0,0,0,0,0,0,0,0,...,1,3,0,1,8,0,2,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,18-Aug-20,64999,84,9652,134,2534,3257,89,808,51,...,1705,1347,20,5709,1682,205,4218,468,3175,0
158,19-Aug-20,69196,75,9742,75,2116,2884,91,752,37,...,1683,1312,25,5795,1763,236,5076,264,3169,0
159,20-Aug-20,68518,76,9393,116,1735,2451,119,1052,34,...,1741,1330,58,5986,1724,190,4824,411,3197,0
160,21-Aug-20,68519,67,9544,60,1856,2461,116,873,53,...,1503,1335,46,5995,1457,256,4905,447,3245,0


In [4]:
def run_for_every_city(daily_conf_cases):
    daily_conf_cases.index = pd.to_datetime(confirmed_df['Date'])
    city = dict[daily_conf_cases.name]
    plt.plot(daily_conf_cases, label = city)
    plt.legend(loc="upper left")
    plt.title("Daily confirmed cases for " + city);
    plt.savefig('confirmed_cases_plot/'+ city + '.png')
    plt.close()

In [5]:
def create_seq(data, seq_len):
    x = []
    y = []
    for i in range(len(data)-seq_len-1):
        x1 = data[i:(i+seq_len)]
        y1 = data[i+seq_len]
        x.append(x1)
        y.append(y1)
    return np.array(x), np.array(y)

In [6]:
class CoronaVirusPredictor(nn.Module):

  def __init__(self, n_features, n_hidden, seq_len, n_layers=2):
    super(CoronaVirusPredictor, self).__init__()

    self.n_hidden = n_hidden
    self.seq_len = seq_len
    self.n_layers = n_layers

    self.lstm = nn.LSTM(
      input_size=n_features,
      hidden_size=n_hidden,
      num_layers=n_layers,
      dropout=0.5
    )

    self.linear = nn.Linear(in_features=n_hidden, out_features=1)

  def reset_hidden_state(self):
    self.hidden = (
        torch.zeros(self.n_layers, self.seq_len, self.n_hidden),
        torch.zeros(self.n_layers, self.seq_len, self.n_hidden)
    )

  def forward(self, sequences):
    lstm_out, self.hidden = self.lstm(
      sequences.view(len(sequences), self.seq_len, -1),
      self.hidden
    )
    last_time_step = \
      lstm_out.view(self.seq_len, len(sequences), self.n_hidden)[-1]
    y_pred = self.linear(last_time_step)
    return y_pred

In [7]:
def train_model(
  model, 
  train_data, 
  train_labels, 
  test_data=None, 
  test_labels=None
):
  loss_fn = torch.nn.MSELoss(reduction='sum')

  optimiser = torch.optim.Adam(model.parameters(), lr=1e-3)
  num_epochs = 100

  train_hist = np.zeros(num_epochs)
  test_hist = np.zeros(num_epochs)

  for t in range(num_epochs):
    model.reset_hidden_state()

    y_pred = model(train_data)

    loss = loss_fn(y_pred.float(), train_labels)

    if test_data is not None:
      with torch.no_grad():
        y_test_pred = model(test_data)
        test_loss = loss_fn(y_test_pred.float(), test_labels)
      test_hist[t] = test_loss.item()

      if t % 10 == 0:  
        print(f'Epoch {t} train loss: {loss.item()} test loss: {test_loss.item()}')
    elif t % 10 == 0:
      print(f'Epoch {t} train loss: {loss.item()}')

    train_hist[t] = loss.item()
    
    optimiser.zero_grad()

    loss.backward()

    optimiser.step()
  
  return model.eval(), train_hist, test_hist

In [8]:
def run_model(daily_conf_cases) : 
    test_data_size = 60
    city = dict[daily_conf_cases.name]
    train_data = daily_conf_cases[:-test_data_size]
    test_data = daily_conf_cases[-test_data_size:]
    scaler = MinMaxScaler()
    scaler = scaler.fit(train_data[:, np.newaxis])
    train_data = scaler.transform(train_data[:, np.newaxis])
    test_data = scaler.transform(test_data[:, np.newaxis])
    seq_len = 5
    xtrain, ytrain = create_seq(train_data, seq_len)
    xtest, ytest = create_seq(test_data, seq_len)
    xtrain = torch.from_numpy(xtrain).float()
    ytrain = torch.from_numpy(ytrain).float()
    xtest = torch.from_numpy(xtest).float()
    ytest = torch.from_numpy(ytest).float()
    model = CoronaVirusPredictor(
      n_features=1, 
      n_hidden=512, 
      seq_len=seq_len, 
      n_layers=3
    )
    model, train_hist, test_hist = train_model(
      model, 
      xtrain, 
      ytrain, 
      xtest, 
      ytest
    )
    with torch.no_grad():
      test_seq = xtest[:1]
      preds = []
      for _ in range(len(xtest)):
        y_test_pred = model(test_seq)
        pred = torch.flatten(y_test_pred).item()
        preds.append(pred)
        new_seq = test_seq.numpy().flatten()
        new_seq = np.append(new_seq, [pred])
        new_seq = new_seq[1:]
        test_seq = torch.as_tensor(new_seq).view(1, seq_len, 1).float()
        
    true_cases = scaler.inverse_transform(
    np.expand_dims(ytest.flatten().numpy(), axis=0)
    ).flatten()

    predicted_cases = scaler.inverse_transform(
      np.expand_dims(preds, axis=0)
    ).flatten()

    scaler = scaler.fit(np.expand_dims(daily_conf_cases, axis=1))
    all_data = scaler.transform(np.expand_dims(daily_conf_cases, axis=1))
    X_all, y_all = create_seq(all_data, seq_len)
    X_all = torch.from_numpy(X_all).float()
    y_all = torch.from_numpy(y_all).float()
    model = CoronaVirusPredictor(
      n_features=1, 
      n_hidden=512, 
      seq_len=seq_len, 
      n_layers=2
    )
    model, train_hist, _ = train_model(model, X_all, y_all)
    DAYS_TO_PREDICT = 50
    with torch.no_grad():
      test_seq = X_all[:1]
      preds = []
      for _ in range(DAYS_TO_PREDICT):
        y_test_pred = model(test_seq)
        pred = torch.flatten(y_test_pred).item()
        preds.append(pred)
        new_seq = test_seq.numpy().flatten()
        new_seq = np.append(new_seq, [pred])
        new_seq = new_seq[1:]
        test_seq = torch.as_tensor(new_seq).view(1, seq_len, 1).float()
    predicted_cases = scaler.inverse_transform(np.expand_dims(preds, axis=0)).flatten()
    predicted_index = pd.date_range(
      start=daily_conf_cases.index[-1],
      periods=DAYS_TO_PREDICT + 1,
      closed='right'
    )
    predicted_cases = pd.Series(data=predicted_cases,index=predicted_index)
    plt.plot(daily_conf_cases, label='Historical Daily Cases for ' + city)
    plt.plot(predicted_cases, label='Predicted Daily Cases for ' + city)
    plt.legend();
    plt.savefig('model_output/'+ city + '.png')
    plt.close()

In [9]:
total_cols=len(confirmed_df.axes[1])
for x in range(1,total_cols) :
    daily_conf_cases = confirmed_df.iloc[0:,x]
    run_for_every_city(daily_conf_cases)

In [11]:
total_cols=len(confirmed_df.axes[1])
for x in range(15,total_cols) :
    daily_conf_cases = confirmed_df.iloc[0:,x]
    daily_conf_cases.index = pd.to_datetime(confirmed_df['Date'])
    run_model(daily_conf_cases)

Epoch 0 train loss: 4.251269340515137 test loss: 127.43582153320312
Epoch 10 train loss: 3.6781013011932373 test loss: 112.69070434570312
Epoch 20 train loss: 3.5740504264831543 test loss: 107.01036071777344
Epoch 30 train loss: 3.371168613433838 test loss: 55.20926284790039
Epoch 40 train loss: 3.0252108573913574 test loss: 58.1048698425293
Epoch 50 train loss: 2.3194124698638916 test loss: 72.12226867675781
Epoch 60 train loss: 1.9592301845550537 test loss: 45.89516830444336
Epoch 70 train loss: 1.8852767944335938 test loss: 57.142948150634766
Epoch 80 train loss: 1.7876957654953003 test loss: 47.623504638671875
Epoch 90 train loss: 2.1121959686279297 test loss: 51.62776184082031
Epoch 0 train loss: 10.73287296295166
Epoch 10 train loss: 7.356053829193115
Epoch 20 train loss: 14.772233963012695
Epoch 30 train loss: 7.484115123748779
Epoch 40 train loss: 7.295403003692627
Epoch 50 train loss: 6.2574663162231445
Epoch 60 train loss: 5.665953159332275
Epoch 70 train loss: 5.628957748413

Epoch 90 train loss: 7.446242332458496 test loss: 260.8211669921875
Epoch 0 train loss: 18.76854133605957
Epoch 10 train loss: 10.074174880981445
Epoch 20 train loss: 9.218064308166504
Epoch 30 train loss: 8.957684516906738
Epoch 40 train loss: 10.983050346374512
Epoch 50 train loss: 10.263555526733398
Epoch 60 train loss: 9.620718002319336
Epoch 70 train loss: 9.417646408081055
Epoch 80 train loss: 9.158206939697266
Epoch 90 train loss: 7.839008808135986
Epoch 0 train loss: 20.59468650817871 test loss: 312.4052734375
Epoch 10 train loss: 10.14889144897461 test loss: 193.70252990722656
Epoch 20 train loss: 7.948349475860596 test loss: 19.655115127563477
Epoch 30 train loss: 13.161867141723633 test loss: 70.18724060058594
Epoch 40 train loss: 9.949705123901367 test loss: 211.75047302246094
Epoch 50 train loss: 9.97968864440918 test loss: 197.6660919189453
Epoch 60 train loss: 9.116024017333984 test loss: 115.96434020996094
Epoch 70 train loss: 49.62544250488281 test loss: 20.75411415100

Epoch 30 train loss: 2.0937769412994385 test loss: 288.65093994140625
Epoch 40 train loss: 2.159374952316284 test loss: 690.2090454101562
Epoch 50 train loss: 2.1374330520629883 test loss: 998.4773559570312
Epoch 60 train loss: 2.090484619140625 test loss: 1251.9188232421875
Epoch 70 train loss: 2.070995569229126 test loss: 1370.14794921875
Epoch 80 train loss: 2.030294895172119 test loss: 1461.480712890625
Epoch 90 train loss: 2.018136978149414 test loss: 1622.0836181640625
Epoch 0 train loss: 12.372042655944824
Epoch 10 train loss: 6.418564796447754
Epoch 20 train loss: 2.258286714553833
Epoch 30 train loss: 1.4186627864837646
Epoch 40 train loss: 1.2889403104782104
Epoch 50 train loss: 1.833652138710022
Epoch 60 train loss: 1.3374217748641968
Epoch 70 train loss: 1.1314557790756226
Epoch 80 train loss: 1.1179494857788086
Epoch 90 train loss: 0.922927737236023
Epoch 0 train loss: 18.939828872680664 test loss: 325.8379211425781
Epoch 10 train loss: 7.669549465179443 test loss: 219.139

Epoch 40 train loss: 8.746350288391113
Epoch 50 train loss: 5.303770542144775
Epoch 60 train loss: 5.829370021820068
Epoch 70 train loss: 3.3525729179382324
Epoch 80 train loss: 2.36708664894104
Epoch 90 train loss: 2.4373672008514404
Epoch 0 train loss: 20.026954650878906 test loss: 1257.0009765625
Epoch 10 train loss: 10.564562797546387 test loss: 1011.1482543945312
Epoch 20 train loss: 3.5138888359069824 test loss: 451.9433288574219
Epoch 30 train loss: 9.770078659057617 test loss: 761.3167114257812
Epoch 40 train loss: 4.040634632110596 test loss: 530.6218872070312
Epoch 50 train loss: 2.02760648727417 test loss: 728.2655639648438
Epoch 60 train loss: 1.4785369634628296 test loss: 790.4743041992188
Epoch 70 train loss: 1.2494949102401733 test loss: 891.045654296875
Epoch 80 train loss: 1.0289682149887085 test loss: 896.6912841796875
Epoch 90 train loss: 0.973457396030426 test loss: 869.7395629882812
Epoch 0 train loss: 26.016077041625977
Epoch 10 train loss: 14.717351913452148
Epoc