In [26]:
import pandas as pd

train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")

In [27]:
train.head()

Unnamed: 0,row_id,time,x,y,direction,congestion
0,0,1991-04-01 00:00:00,0,0,EB,70
1,1,1991-04-01 00:00:00,0,0,NB,49
2,2,1991-04-01 00:00:00,0,0,SB,24
3,3,1991-04-01 00:00:00,0,1,EB,18
4,4,1991-04-01 00:00:00,0,1,NB,60


In [28]:
test.head()

Unnamed: 0,row_id,time,x,y,direction
0,848835,1991-09-30 12:00:00,0,0,EB
1,848836,1991-09-30 12:00:00,0,0,NB
2,848837,1991-09-30 12:00:00,0,0,SB
3,848838,1991-09-30 12:00:00,0,1,EB
4,848839,1991-09-30 12:00:00,0,1,NB


In [29]:
train.shape, test.shape

((848835, 6), (2340, 5))

In [30]:
train = train.drop(columns=['row_id'])
train['time'] = pd.to_datetime(train['time'])
train = train.sort_values('time')

In [31]:
train=pd.get_dummies(train, columns=['direction'])
test=pd.get_dummies(test, columns=['direction'])


In [32]:
train.head()

Unnamed: 0,time,x,y,congestion,direction_EB,direction_NB,direction_NE,direction_NW,direction_SB,direction_SE,direction_SW,direction_WB
0,1991-04-01,0,0,70,True,False,False,False,False,False,False,False
35,1991-04-01,1,3,44,False,False,True,False,False,False,False,False
36,1991-04-01,1,3,37,False,False,False,False,True,False,False,False
37,1991-04-01,1,3,15,False,False,False,False,False,False,True,False
38,1991-04-01,1,3,46,False,False,False,False,False,False,False,True


In [37]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
train['congestion'] = scaler.fit_transform(train[['congestion']])

In [38]:
import numpy as np

def create_sequences(data, seq_len):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len])
    return np.array(X), np.array(y)

In [35]:
SEQ_LEN = 5
X_all, y_all = [], []

group_cols = ['x', 'y', 'direction_EB', 'direction_NB', 'direction_SB']

for _, group in train.groupby(group_cols):
    values = group[['congestion']].values
    if len(values) > SEQ_LEN:
        X, y = create_sequences(values, SEQ_LEN)
        X_all.append(X)
        y_all.append(y)

X = np.concatenate(X_all)
y = np.concatenate(y_all)

In [36]:
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [39]:
import torch
import torch.nn as nn

class TrafficLSTM(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=64, batch_first=True)
        self.fc = nn.Linear(64, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        return self.fc(out)

In [44]:
model = TrafficLSTM()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [45]:
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader

EPOCHS = 5
BATCH_SIZE = 1024

train_dataset = TensorDataset(
    torch.tensor(X_train, dtype=torch.float32),
    torch.tensor(y_train, dtype=torch.float32),
)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0

    for xb, yb in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}", leave=False):
        optimizer.zero_grad()
        output = model(xb)
        loss = criterion(output, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * xb.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch + 1}/{EPOCHS}, Loss: {epoch_loss:.4f}")

Epoch 1/5:   0%|          | 0/663 [00:00<?, ?it/s]

                                                            

Epoch 1/5, Loss: 0.0194


                                                            

Epoch 2/5, Loss: 0.0125


                                                            

Epoch 3/5, Loss: 0.0121


                                                            

Epoch 4/5, Loss: 0.0119


                                                            

Epoch 5/5, Loss: 0.0119




In [46]:
from torch.utils.data import TensorDataset, DataLoader

test_dataset = TensorDataset(
    torch.tensor(X_test, dtype=torch.float32),
    torch.tensor(y_test, dtype=torch.float32),
)
test_loader = DataLoader(test_dataset, batch_size=2048, shuffle=False)

model.eval()
total_loss = 0.0
with torch.no_grad():
    for xb, yb in test_loader:
        preds = model(xb)
        batch_loss = criterion(preds, yb)
        total_loss += batch_loss.item() * xb.size(0)

mse = total_loss / len(test_loader.dataset)
print("Test MSE:", mse)

Test MSE: 0.01864917203020142
