## LOAD SOLUTION FILES

In [3]:
import os
import pickle
import pandas as pd
from tf_attention import *
from tf_helpers import *

### Extract OD demand and path set (X and Y)
X: OD demand, graph (link feature), path, link-path adj \
Y: path flow

In [54]:
%run tf_attention.py
%run tf_helpers.py 

class Dataset:
    def __init__(self, size, standard_norm, start_from=0):
        self.path_encoded = path_encoder()  # Get path encode dictionary
        self.X = []
        self.Y = []

        for i in tqdm(range(size)):
            file_name = f"Output/5by5_Data{start_from+i}"
            x, y = generate_xy(file_name, self.path_encoded, standard_norm)
            self.X.append(x)
            self.Y.append(y)
        
        self.X = tf.stack(self.X, axis=0)
        self.Y = tf.stack(self.Y, axis=0)
        
    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]

    def to_tf_dataset(self, batch_size):
        dataset = tf.data.Dataset.from_tensor_slices((self.X, self.Y))
        dataset = dataset.shuffle(buffer_size=len(self.X)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
        return dataset 

# class Dataset(tf.data.Dataset):
#     def __init__(self, size, standard_norm, start_from=0):
#         super().__init__()
#         self.path_encoded = path_encoder()  # Get path encode dictionary
#         self.standard_norm = standard_norm
#         self.start_from = start_from

#         self.data = tf.data.Dataset.range(size).map(self.generate_xy)

#     def generate_xy(self, i):
#         file_name = f"Output/5by5_Data{self.start_from + i}"
#         x, y = generate_xy(file_name, self.path_encoded, self.standard_norm)
#         return x, y

#     def __len__(self):
#         return self.data.cardinality().numpy()

#     def __getitem__(self, idx):
#         return self.data[idx]
    

In [55]:
# %run helpers.py
sequence_leng = 625
input_dim = 9
output_dim = 3
batch_size = 32
train_size = 1000
val_size = 500
standard_norm = 'normalize'

train_dataset = Dataset(train_size, standard_norm)
train_data_loader = train_dataset.to_tf_dataset(batch_size)

val_dataset = Dataset(val_size, standard_norm, start_from=train_size)
val_data_loader = val_dataset.to_tf_dataset(batch_size)

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

### Test Encoder

In [24]:
# print(torch.cuda.is_available())
%run tf_helpers.py
import tensorflow as tf

size = 32
sequence_length = 625
input_dim = 9
output_dim = 3

path_encoded = path_encoder()
X = []
Y = []
network = []
for i in tqdm(range(size)) :
    file_name = f"Output/5by5_Data{i}"
    x, y = generate_xy(file_name, path_encoded, ' ')
    X.append(x)
    Y.append(y)

    # file = open(file_name, "rb")
    # stat = pickle.load(file)
    # file.close()
    # net = stat["data"]["network"]
    # Graph = get_graphTensor(net)
    # network.append(Graph)
X = tf.stack(X, axis=0)
Y = tf.stack(Y, axis=0)
    

  0%|          | 0/32 [00:00<?, ?it/s]

In [22]:
mean = tf.reduce_mean(network[0], axis=0)
std = tf.math.reduce_std(network[0], axis=0)
# std = tf.where(tf.equal(std, 0), 1.0, std)


In [23]:
pd.DataFrame(data = {"std": std.numpy(), "mean": mean.numpy()}).head()

Unnamed: 0,std,mean
0,6.745369,13.0
1,6.745369,13.0
2,0.0,2000.0
3,0.0,40.0
4,3.576279e-07,0.6


In [30]:
a = pd.DataFrame(X[0].numpy())

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.041667,1.0,1.0,1.0,0.0,0.0,0.0,0.0
1,0.041667,0.0,1.0,1.0,1.0,0.089089,0.67331,0.636522,0.223377
2,0.0,0.208333,1.0,1.0,1.0,0.855856,0.401213,0.469565,0.994805
3,0.208333,0.0,1.0,1.0,1.0,0.147147,0.373484,0.372174,0.170563
4,0.041667,0.083333,1.0,1.0,1.0,0.0,0.0,0.0,0.0


In [31]:
mask = tf.cast(tf.not_equal(X, 0), dtype=tf.float32) # TensorShape([32, 625, 9])
X = X * mask # TensorShape([32, 625, 9])
dense_layer = tf.keras.layers.Dense(units=512)
X1 = dense_layer(X)
Y.shape

TensorShape([32, 625, 3])

In [32]:
%run tf_attention.py

encoder = Encoder(input_dim=9, d_model=512, N=6, heads=8, dropout=0.1)
encoder_output, attention_scores = encoder(X)
print(encoder_output.shape)

(32, 625, 9)
(32, 625, 9)
(32, 625, 9)
(32, 625, 9)
(32, 625, 9)
(32, 625, 9)
(32, 625, 3)


In [35]:
pd.DataFrame(encoder_output[0].numpy()).head()

Unnamed: 0,0,1,2
0,0.0,2.025494,0.0
1,0.0,2.3439,0.701559
2,0.768207,0.641126,0.0
3,0.0,2.381319,0.0
4,0.0,1.976538,0.0


### Test Decoder

In [36]:
%run tf_attention.py
decoder = Decoder(output_dim=3, d_model=512, N=6, heads=8, dropout=0.1)
decoder_out, scores = decoder(Y, encoder_output)

attn2:  (32, 625, 3)
attn2:  (32, 625, 3)
attn2:  (32, 625, 3)
attn2:  (32, 625, 3)
attn2:  (32, 625, 3)
attn2:  (32, 625, 3)


In [37]:
pd.DataFrame(decoder_out[0].numpy()).head()

Unnamed: 0,0,1,2
0,-1.403409,0.847762,0.555647
1,0.404544,-1.375199,0.970655
2,1.338848,-1.061998,-0.27685
3,0.796012,-1.409576,0.613564
4,-1.403409,0.847762,0.555647


### Test Transformer

In [63]:
%run tf_attention.py

transformer = Transformer(9,3,512,6,8,0.1)
transformer.evsal()
out = transformer(X, Y)
pd.DataFrame(out[0].numpy()).head()

Unnamed: 0,0,1,2
0,-2.298156,0.605001,-0.69596
1,-2.339311,0.621965,-0.694398
2,-1.098205,0.323246,-0.254458
3,-2.294102,0.617391,-0.663939
4,-2.298156,0.605001,-0.69596


### TRY TRAINING AND VALIDATING

In [61]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
%run tf_attention.py

# os.environ["CUDA_VISIBLE_DEVICES"] = "1"
# device = tf.device('gpu' if tf.test.is_gpu_available() else 'cpu')
device = 'cpu'

model = Transformer(input_dim=input_dim, output_dim=output_dim, d_model=512, N=2, heads=8, dropout=0.1)

epochs = 100
gradient_accumulation_steps = 4
learning_rate = 0.001

loss_fn = MeanSquaredError()
optimizer = Adam(learning_rate=learning_rate)

train_losses = []
val_losses = []

# Training loop
for epoch in range(epochs):
    print(f"Epoch [{epoch+1}/{epochs}] -----")
    
    # Training phase
    total_train_loss = 0
    with tqdm(total=len(train_data_loader), unit="batch") as pbar:
        for batch in train_data_loader:
            src, trg = batch
            with tf.device(device):
                with tf.GradientTape() as tape:
                    output = model(src, trg)
                    loss = loss_fn(trg, output)
                
                # Backpropagate and update the model
                gradients = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                total_train_loss += loss.numpy()
            
            pbar.update(1)
            pbar.set_description(f"Train Loss: {total_train_loss / (pbar.n + 1):.4f}")
    
    # Validation phase
    model.eval()
    total_val_loss = 0
    with tqdm(total=len(val_data_loader), unit="batch") as pbar:
        for batch in val_data_loader:
            # Move the batch to the device
            src, trg = batch
            with tf.device(device):
                # Forward pass
                output = model(src, trg[:, :-1])
                output = tf.reshape(output, (-1, output.shape[-1]))
                trg = tf.reshape(trg[:, 1:], (-1,))
                
                # Compute the loss
                loss = loss_fn(trg, output)
                total_val_loss += loss
            
            pbar.update(1)
            pbar.set_description(f"Val Loss: {total_val_loss / (pbar.n + 1):.4f}")
    
    train_losses.append(total_train_loss / len(train_data_loader))
    val_losses.append(total_val_loss / len(val_data_loader))
    
    # Print the training and validation losses
    print(f"Train Loss: {total_train_loss/len(train_data_loader):.4f}, Val Loss: {total_val_loss/len(val_data_loader):.4f}")


Epoch [1/100] -----


  0%|          | 0/32 [00:00<?, ?batch/s]

KeyboardInterrupt: 

In [None]:
def plot_loss(train_losses, val_losses, epochs):
    plt.figure(figsize=(12, 6))
    plt.plot(range(1, epochs+1), train_losses, label='Training Loss')
    plt.plot(range(1, epochs+1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.grid(True)
    plt.show()

# Example usage
train_losses = [0.8, 0.6, 0.5, 0.4, 0.3]
val_losses = [0.9, 0.7, 0.6, 0.5, 0.4]
epochs = 5

plot_loss(train_losses, val_losses, epochs)