## LOAD SOLUTION FILES

In [2]:
import os
import pickle
import pandas as pd
from attention import *
from helpers import *

In [12]:
!pip install numba

Collecting numba
  Downloading numba-0.58.1-cp38-cp38-win_amd64.whl.metadata (2.8 kB)
Collecting llvmlite<0.42,>=0.41.0dev0 (from numba)
  Downloading llvmlite-0.41.1-cp38-cp38-win_amd64.whl.metadata (4.9 kB)
Downloading numba-0.58.1-cp38-cp38-win_amd64.whl (2.6 MB)
   ---------------------------------------- 0.0/2.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.6 MB 435.7 kB/s eta 0:00:06
   - -------------------------------------- 0.1/2.6 MB 1.2 MB/s eta 0:00:03
   ------------ --------------------------- 0.8/2.6 MB 5.7 MB/s eta 0:00:01
   --------------------- ------------------ 1.4/2.6 MB 6.9 MB/s eta 0:00:01
   ------------------------------ --------- 2.0/2.6 MB 7.9 MB/s eta 0:00:01
   ---------------------------------------  2.6/2.6 MB 8.2 MB/s eta 0:00:01
   ---------------------------------------- 2.6/2.6 MB 8.0 MB/s eta 0:00:00
Downloading llvmlite-0.41.1-cp38-cp38-win_amd64.whl (28.1 MB)

### Extract OD demand and path set (X and Y)
X: OD demand, graph (link feature), path, link-path adj \
Y: path flow

In [72]:
%run helpers.py
%run attention.py

class Dataset():
    def __init__(self, size, input_dim0, input_dim1, output_dim0, output_dim1, standard_norm, start_from=0):
        super().__init__()
        self.path_encoded = path_encoder() # Get path encode dictionary
        self.entries = size
        self.X = torch.zeros([size, input_dim0, input_dim1], dtype=torch.float32)
        self.Y = torch.zeros([size, output_dim0, output_dim1], dtype=torch.float32)

        for i in tqdm(range(size)) :
            file_name = f"Output/5by5_Data{start_from+i}"
            x, y = generate_xy(file_name, self.path_encoded, standard_norm)
            self.X[i] = x
            self.Y[i] = y
    
    def __len__(self):
        return self.entries

    def __getitem__(self, idx):
        data_point = self.X[idx]
        data_label = self.Y[idx]
        return data_point, data_label
    

In [73]:
# %run helpers.py
sequence_leng = 625
input_dim = 9
output_dim = 3
batch_size = 32
train_size = 1000
val_size = 500
standard_norm = 'standardize'

train_dataset = Dataset(train_size, sequence_leng, input_dim, sequence_leng, output_dim, standard_norm)
train_data_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=False)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [67]:
standard_norm = 'normalize'
val_dataset = Dataset(val_size, sequence_leng, input_dim, sequence_leng, output_dim, standard_norm, start_from=train_size)
val_data_loader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=False)

  0%|          | 0/500 [00:00<?, ?it/s]

### Test Encoder

In [61]:
# print(torch.cuda.is_available())
%run helpers.py

path_encoded = path_encoder()
X = torch.zeros([size, sequence_leng, input_dim], dtype=torch.float32)
Y = torch.zeros([size, sequence_leng, output_dim], dtype=torch.float32)
network = []
for i in tqdm(range(size)) :
    file_name = f"Output/5by5_Data{i}"
    x, y = generate_xy(file_name, path_encoded)
    X[i] = x 
    Y[i] = y 

    # file = open(file_name, "rb")
    # stat = pickle.load(file)
    # file.close()
    # path_links = stat["path_flow"]
    # nodes = stat["data"]["nodes"]
    # Path_tensor = normalize_tensor(preprocess_path(path_links, nodes, path_encoded))
    # network.append(Path_tensor)

    

  0%|          | 0/30 [00:00<?, ?it/s]

In [4]:
%run attention.py

# X = torch.flatten(X, start_dim=1)
print("input shape: ", X.shape)

x2 = Encoder(input_dim=input_dim, d_model=2048, N=2, heads=8, dropout=0.1)(X)
print("Encoder output shape: ", x2.shape)

input shape:  torch.Size([30, 625, 1164])
Finish calculate attention score
Finish Multihead attention
Finish calculate attention score
Finish Multihead attention
Finish encoder
Encoder output shape:  torch.Size([30, 625, 2048])


### Test Decoder

In [7]:
%run attention.py
decoder = Decoder(output_dim, 2048, 2, 8, 0.1)
x3 = decoder(Y, x2)
x3.shape

Finish calculate attention score
Finish Multihead attention
Finish calculate attention score
Finish Multihead attention
Finish calculate attention score
Finish Multihead attention
Finish calculate attention score
Finish Multihead attention


torch.Size([30, 625, 2048])

### Test Transformer

In [62]:
%run attention.py

transformer = Transformer(input_dim=input_dim, output_dim=output_dim, d_model=512, N=2,heads=8, dropout=0.1)
out = transformer(X, Y)
out.shape

Finish calculate attention score
Finish Multihead attention
Finish calculate attention score
Finish Multihead attention
Finish encoder
Finish calculate attention score
Finish Multihead attention
Finish calculate attention score
Finish Multihead attention
Finish calculate attention score
Finish Multihead attention
Finish calculate attention score
Finish Multihead attention


torch.Size([30, 625, 3])

In [63]:
df = pd.DataFrame(out[0].detach().numpy())
df.head()

Unnamed: 0,0,1,2
0,0.073931,1.032052,0.251043
1,-0.140311,-0.873479,0.17298
2,-0.150692,-0.885234,0.166668
3,-0.147159,-0.879593,0.16988
4,-0.088899,0.485946,0.074822


### TRY TRAINING AND VALIDATING

In [None]:
import torch.optim as optim

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Transformer(input_dim=input_dim, output_dim=output_dim, d_model=512, N=2,heads=8, dropout=0.1)
epochs = 100

model.to(device)
    
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
# criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(epochs):
    # Training phase
    model.train()
    total_train_loss = 0
    for batch in train_data_loader:
        # Move the batch to the device
        src, trg = batch
        src, trg = src.to(device), trg.to(device)
        
        # Forward pass
        output = model(src, trg[:, :-1])
        output = output.reshape(-1, output.size(-1))
        trg = trg[:, 1:].reshape(-1)
        
        # Compute the loss and backpropagate
        loss = criterion(output, trg)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_train_loss += loss.item()
    
    # Validation phase
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for batch in val_data_loader:
            # Move the batch to the device
            src, trg = batch
            src, trg = src.to(device), trg.to(device)
            
            # Forward pass
            output = model(src, trg[:, :-1])
            output = output.reshape(-1, output.size(-1))
            trg = trg[:, 1:].reshape(-1)
            
            # Compute the loss
            loss = criterion(output, trg)
            total_val_loss += loss.item()
    
    # Print the training and validation losses
    print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {total_train_loss/len(train_data_loader):.4f}, Val Loss: {total_val_loss/len(val_data_loader):.4f}")
