In [1]:
import pandas as pd
import torch
import tensorflow as tf
import numpy as np

In [4]:
data = pd.read_csv(r'../Dataset/2nd_Wave_dataset.csv', header = None)

In [5]:
data.head(50)

Unnamed: 0,0,1
0,2020-10-03,8
1,2020-10-04,75
2,2020-10-05,262
3,2020-10-06,519
4,2020-10-07,207
5,2020-10-08,29
6,2020-10-09,35
7,2020-10-10,105
8,2020-10-11,124
9,2020-10-12,92


In [7]:
data = data.iloc[:, 1]

In [8]:
## splitting data

In [9]:
test_data_size = 15
train_data = data[:-test_data_size]
test_data = data[-test_data_size:]

In [10]:
train_data.shape

(12,)

In [9]:
train_data

42       0.0
43       1.0
44       0.0
45       1.0
46       3.0
       ...  
257    105.0
258    124.0
259     92.0
260    194.0
261    132.0
Name: Total_Island, Length: 220, dtype: float64

In [10]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [11]:
scaler = scaler.fit(np.expand_dims(train_data, axis = 1))
scaled_train = scaler.transform(np.expand_dims(train_data, axis = 1))
scaled_test = scaler.transform(np.expand_dims(test_data, axis = 1))

In [12]:
scaled_train

array([[0.        ],
       [0.00192678],
       [0.        ],
       [0.00192678],
       [0.00578035],
       [0.00963391],
       [0.01348748],
       [0.01926782],
       [0.02890173],
       [0.01734104],
       [0.01348748],
       [0.02504817],
       [0.01926782],
       [0.        ],
       [0.02890173],
       [0.00963391],
       [0.        ],
       [0.00770713],
       [0.        ],
       [0.01348748],
       [0.00770713],
       [0.00963391],
       [0.04046243],
       [0.00578035],
       [0.00963391],
       [0.01541426],
       [0.01348748],
       [0.01926782],
       [0.00385356],
       [0.01348748],
       [0.00770713],
       [0.00192678],
       [0.        ],
       [0.01734104],
       [0.02119461],
       [0.01541426],
       [0.02890173],
       [0.00963391],
       [0.        ],
       [0.01156069],
       [0.01926782],
       [0.0327553 ],
       [0.06358382],
       [0.01156069],
       [0.03853565],
       [0.00963391],
       [0.07899807],
       [0.073

In [13]:
## creating sequences

In [16]:
def create_sequences(df, seq_length):
    xs = []
    ys = []
    
    for i in range(len(df) - seq_length - 1):
        x = df[i:(i+seq_length)]
        y = df[i+seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [17]:
len(island_data)

235

In [19]:
seq_length = 15
X_train, y_train = create_sequences(scaled_train, seq_length)
X_test, y_test = create_sequences(scaled_test, seq_length)

X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
y_train = torch.from_numpy(y_train).float()
y_test = torch.from_numpy(y_test).float()

In [24]:
print(X_train[1])
print(y_train[0])

tensor([[0.0019],
        [0.0000],
        [0.0019],
        [0.0058],
        [0.0096],
        [0.0135],
        [0.0193],
        [0.0289],
        [0.0173],
        [0.0135],
        [0.0250],
        [0.0193],
        [0.0000],
        [0.0289],
        [0.0096]])
tensor([0.0096])


In [25]:
##Creating the model

In [29]:
import torch.nn as nn
class SeriesPredictor(nn.Module):
    def __init__(self, n_features, n_hidden, seq_len, n_layers = 2):
        super(SeriesPredictor, self).__init__()
        
        self.n_hidden = n_hidden
        self.seq_len = seq_len
        self.n_layers = n_layers
        
        self.lstm = nn.LSTM(
            input_size = n_features,
            hidden_size = n_hidden,
            num_layers = n_layers,
            dropout = 0.2)
        
        self.linear = nn.Linear(in_features = n_hidden, out_features = 1)
        
    def reset_hidden_state(self):
        self.hidden = (
            torch.zeros(self.n_layers, self.seq_len, self.n_hidden),
            torch.zeros(self.n_layers, self.seq_len, self.n_hidden)
        )
        
    def forward(self, sequences):
        lstm_out, self.hidden = self.lstm(
            sequences.view(len(sequences), self.seq_len, -1),
            self.hidden)
        last_time_step = \
            lstm_out.view(self.seq_len, len(sequences), self.n_hidden)[-1]
        y_pred = self.linear(last_time_step)
        return y_pred

In [31]:
model = SeriesPredictor(
    n_features = 1,
    n_hidden = 50,
    seq_len = seq_length,
    n_layers = 2)

In [35]:
def train_model(
    model,
    train_data,
    train_labels,
    test_data = None,
    test_labels = None):
    
    loss_fn = torch.nn.MSELoss(reduction = 'sum')
    optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
    num_epochs = 20
    
    train_hist = np.zeros(num_epochs)
    test_hist = np.zeros(num_epochs)
    
    for t in range(num_epochs):
        model.reset_hidden_state()
        y_pred = model(X_train)
        loss = loss_fn(y_pred.float(), y_train)
        
        if test_data is not None:
            with torch.no_grad():
                y_test_pred = model(X_test)
                test_loss = loss_fn(y_test_pred.float(), y_test)
            test_hist[t] = test_loss.item()
            
            print(f'Epoch {t} train loss: {loss.item()} test loss: {test_loss.item()}')
            
        train_hist[t] = loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    return model.eval(), train_hist, test_hist

In [36]:
with torch.no_grad():
    test_seq = X_test[:1]
    preds = []
    for i in range(len(X_test)):
        y_test_pred = model(test_seq)
        pred = torch.flatten(y_test_pred).item()
        preds.append(pred)
        
        new_seq = test_seq.numpy().flatten()
        new_seq = np.append(new_seq, [pred])
        new_seq = new_seq[1:]
        test_seq = torch.as_tensor(new_seq).view(1, seq_length, 1).flaot()

In [37]:
train = train_model(model, X_train, X_test, y_train, y_test)

RuntimeError: cannot reshape tensor of 0 elements into shape [0, 15, -1] because the unspecified dimension size -1 can be any value and is ambiguous