In [74]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/road-occupancy-rate/traffic.csv


In [75]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

In [76]:
def create_sequences(df, seq_length, horizon):
    xs, ys = [], []
    for i in range(len(df) - seq_length - horizon):
        x = df.iloc[i:(i+seq_length)]
        y = df.iloc[(i+seq_length):(i+seq_length+horizon)]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [77]:
seq_length = 24
horizon = 3
df = pd.read_csv("/kaggle/input/road-occupancy-rate/traffic.csv")
print(df.shape)
X, y = create_sequences(df, seq_length, horizon)
print(X.shape, y.shape)

(17543, 862)
(17516, 24, 862) (17516, 3, 862)


In [78]:
#Splitting into train-test data
train_size = int(len(y) * 0.7)
val_size = int((len(y)-train_size) * 0.5)
X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
# Create TensorDataset
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).float()
X_val = torch.from_numpy(X_val).float()
y_val = torch.from_numpy(y_val).float()
X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).float()

In [79]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
print(X_val.shape)
print(y_val.shape)

torch.Size([12261, 24, 862])
torch.Size([12261, 3, 862])
torch.Size([2628, 24, 862])
torch.Size([2628, 3, 862])
torch.Size([2627, 24, 862])
torch.Size([2627, 3, 862])


In [80]:
train = TensorDataset(X_train, y_train)
train_loader = DataLoader(train, batch_size = 32, shuffle = False, drop_last = True)

val = TensorDataset(X_val, y_val)
val_loader = DataLoader(val, batch_size = 32, shuffle = False, drop_last = True)

test = TensorDataset(X_test, y_test)
test_loader = DataLoader(test, batch_size = 32, shuffle = False, drop_last = True)

In [81]:
class LSTM(nn.Module):
    def __init__(self, input_size,hidden_size,num_layers,output_size):
        super(LSTM, self).__init__()
        # Define lstm layer
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        # Initialize long-term memory
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        # Pass all inputs to lstm layer
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [82]:
input_size = 862  #number of features
hidden_size = 32 #how many features LSTM should create
num_layers = 2 #number of stacked LSTM layers
output_size = 862 #number of predicted values
batch_size = 32
model = LSTM(input_size,hidden_size,num_layers,output_size)

In [83]:
from torchinfo import summary
summary(model)

Layer (type:depth-idx)                   Param #
LSTM                                     --
├─LSTM: 1-1                              123,136
├─Linear: 1-2                            28,446
Total params: 151,582
Trainable params: 151,582
Non-trainable params: 0

In [84]:
learning_rate = 0.01 
num_epochs = 20
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_losses = []
test_losses = []
val_losses = []

for epoch in range(num_epochs):
    batch_losses = []
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.view([batch_size, -1, input_size])
        y_batch = y_batch
        outputs = model(x_batch)
        outputs = outputs.unsqueeze(1).repeat(1,3,1)
        optimizer.zero_grad()
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        batch_losses.append(loss.item())
    training_loss = np.mean(batch_losses)
    train_losses.append(training_loss)
    
    
    with torch.no_grad():
        batch_val_losses = []
        for x_val, y_val in val_loader:
            x_val = x_val.view([batch_size, -1, input_size])
            y_val = y_val
            model.eval()
            yhat = model(x_val)
            yhat = yhat.unsqueeze(1).repeat(1,3,1)
            val_loss = criterion(y_val, yhat).item()
            batch_val_losses.append(val_loss)
        validation_loss = np.mean(batch_val_losses)
        val_losses.append(validation_loss)
    
    print(f"[{epoch+1}] Training loss: {training_loss:.4f}\t Validation loss: {validation_loss:.4f}")

[1] Training loss: 0.0012	 Validation loss: 0.0010
[2] Training loss: 0.0009	 Validation loss: 0.0010
[3] Training loss: 0.0008	 Validation loss: 0.0010
[4] Training loss: 0.0008	 Validation loss: 0.0009
[5] Training loss: 0.0008	 Validation loss: 0.0009
[6] Training loss: 0.0008	 Validation loss: 0.0009
[7] Training loss: 0.0008	 Validation loss: 0.0009
[8] Training loss: 0.0008	 Validation loss: 0.0009
[9] Training loss: 0.0008	 Validation loss: 0.0009
[10] Training loss: 0.0008	 Validation loss: 0.0009
[11] Training loss: 0.0008	 Validation loss: 0.0009
[12] Training loss: 0.0008	 Validation loss: 0.0009
[13] Training loss: 0.0008	 Validation loss: 0.0009
[14] Training loss: 0.0008	 Validation loss: 0.0009
[15] Training loss: 0.0008	 Validation loss: 0.0009
[16] Training loss: 0.0008	 Validation loss: 0.0009
[17] Training loss: 0.0008	 Validation loss: 0.0009
[18] Training loss: 0.0008	 Validation loss: 0.0009
[19] Training loss: 0.0008	 Validation loss: 0.0009
[20] Training loss: 0

In [87]:
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    test_outputs = test_outputs.unsqueeze(1).repeat(1,3,1)
    test_loss = criterion(test_outputs, y_test)
    print(f"Test Loss: {test_loss.item():.4f}")

Test Loss: 0.0012
