In [None]:
import os
import re
import math
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import random
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.nn import Linear, ReLU, Dropout, Conv2d, MaxPool2d
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torch.optim import AdamW

import gurobipy as gb
from gurobipy import GRB
import time

### Check if GPU available

In [None]:
# set CUDA_VISIBLE_DEVICES=0
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
interval = 15

train_test_dir = os.path.join(os.getcwd(), f"dataGeneration/preprocessed_data_nopadding_{interval}")

X_train = np.load(os.path.join(train_test_dir, "X_train.npy"))
X_val = np.load(os.path.join(train_test_dir, "X_val.npy"))

y_train = np.load(os.path.join(train_test_dir, "y_train.npy"))
y_val = np.load(os.path.join(train_test_dir, "y_val.npy"))

index_train = np.load(os.path.join(train_test_dir, "indices_train.npy")).astype("int64")
index_val = np.load(os.path.join(train_test_dir, "indices_val.npy")).astype("int64")

solTime_val = np.load(os.path.join(train_test_dir, "solTime_val.npy"))
objVal_val = np.load(os.path.join(train_test_dir, "objVal_val.npy"))
schedule_val = np.load(os.path.join(train_test_dir, "schedule_val.npy")).astype("int32")
model_val = np.load(os.path.join(train_test_dir, "model_val.npy")).astype("int32")


In [None]:
print(X_train.shape, X_val.shape)
print(y_train.shape, y_val.shape)
print(index_train.shape, index_val.shape)

print(solTime_val.shape)
print(objVal_val.shape)
print(schedule_val.shape)
print(model_val.shape)


### Get max length of X and y for padding purposes later (only for training)

### Building the Transformer network

In [None]:
dimensions = X_train[0].shape[1] # feature size

nbTime, nbBus, nbSolar = 48, 33, 3

charging_station = np.squeeze(pd.read_csv(os.path.join(os.path.join(os.getcwd(), 'systemData'), 'cs_params_variable.csv')).to_numpy())
nbCS = len(charging_station)

data_dir = os.path.join(os.getcwd(), 'systemData')
EV_routes = pd.read_csv(os.path.join(data_dir, 'EV_routes.csv')).to_numpy()
nbRoute = EV_routes.shape[0]

nbOut = (nbRoute*(nbTime-1) + nbCS*nbTime*2)

print(dimensions, nbOut)

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe.transpose(0,1))

    def forward(self, x):
        x = x + self.pe[:,:x.size(1),:]
        return self.dropout(x)

class Transformer(nn.Module):
    def __init__(self, dimensions: int, out_dim: int, d_model=256, nhead=4, num_layers=2, dim_feedforward=512, dropout=0.1, ffn=256):
        super(Transformer, self).__init__()

        self.d_model = 64
        self.dim_feedforward = 256
        self.nhead = 4
        self.dp = 0.3
        self.ffn = 256

        self.embedding = nn.Linear(dimensions, self.d_model)

        # self.pos_encoder = PositionalEncoding(self.d_model)

        transformer_layer = nn.TransformerEncoderLayer(d_model=self.d_model, nhead=self.nhead, dim_feedforward=self.dim_feedforward, dropout=self.dp, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(transformer_layer, num_layers=num_layers)

        self.classifier = nn.Sequential(
            # nn.MaxPool1d(self.d_model),
            # nn.Flatten(),
            nn.Linear(self.d_model, self.ffn),
            nn.ReLU(),
            nn.Linear(self.ffn, self.ffn),
            nn.ReLU(),
            nn.Linear(self.ffn, out_dim),
            nn.Sigmoid())
        
        
    def forward(self, x):
        x = self.embedding(x)
        # x = self.pos_encoder(x)
        # print(x.shape)
        x = self.transformer_encoder(x)
        x = x[:,(nbBus*2+nbSolar):, :]

        x = self.classifier(x)
        return x
        

### Create Dataset and DataLoader

In [None]:
config = {
        'batch_size' : 4, # Num samples to average over for gradient updates
        'EPOCHS' : 300, # Num times to iterate over the entire dataset
        'LEARNING_RATE' : 5e-4, # Learning rate for the optimizer
        'WEIGHT_DECAY' : 1e-4, # Weight decay parameter for the Adam optimizer
    }

In [None]:
class coordinationDataset(TensorDataset):
    def __init__(self, X, y):
        super(coordinationDataset, self).__init__()
        self.X = X
        self.y = y
        
    def __getitem__(self, index):
        X = self.X[index]
        y = self.y[index]
        
        X_tensor = torch.tensor(X, dtype=torch.float32)
        y_tensor = torch.round(torch.tensor(y, dtype=torch.float32))

        return X_tensor, y_tensor
    
    def __len__(self):
        return len(self.X)

In [None]:
train_dataset = coordinationDataset(X_train, y_train)
val_dataset = coordinationDataset(X_val, y_val)


In [None]:
net = Transformer(dimensions=dimensions, out_dim=nbOut)

batch_size = config['batch_size']
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

optimizer = optim.Adam(net.parameters(), lr=config["LEARNING_RATE"])


#### Define custom loss function

In [None]:
def asymmetric_loss(predict, target, gamma_neg=0.3, gamma_pos=0, clip=0.0, eps=1e-8, disable_torch_grad_focal_loss=True):

    """"
    Parameters
    ----------
    x: input logits
    y: targets (multi-label binarized vector)
    """

    # Calculating Probabilities
    x_sigmoid = predict
    xs_pos = x_sigmoid
    xs_neg = 1 - x_sigmoid

    # Asymmetric Clipping
    if clip is not None and clip > 0:
        xs_neg = (xs_neg + clip).clamp(max=1)

    # Basic CE calculation
    los_pos = target * torch.log(xs_pos.clamp(min=eps))
    los_neg = (1 - target) * torch.log(xs_neg.clamp(min=eps))
    loss = los_pos + los_neg

    # Asymmetric Focusing
    if gamma_neg > 0 or gamma_pos > 0:
        if disable_torch_grad_focal_loss:
            torch.set_grad_enabled(False)
        pt0 = xs_pos * target
        pt1 = xs_neg * (1 - target)  # pt = p if t > 0 else 1-p
        pt = pt0 + pt1
        one_sided_gamma = gamma_pos * target + gamma_neg * (1 - target)
        one_sided_w = torch.pow(1 - pt, one_sided_gamma)
        if disable_torch_grad_focal_loss:
            torch.set_grad_enabled(True)
        loss *= one_sided_w

    return -loss.sum()


In [None]:
net = net.to(device)

### Start of Training Loop

In [None]:
loss_list = []
loss_list_val = []

for epoch in range(config["EPOCHS"]):
    running_loss = 0.0
    running_loss_val = 0.0
    
    for i, data in enumerate(train_loader):
        # predicting
        net.train()
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)        
        optimizer.zero_grad()
        outputs = net(inputs)

        outputs = outputs.flatten().reshape(labels.shape[0],-1)

        # loss_fn = nn.BCELoss()     
        # print(outputs.shape, labels.shape)
        loss = asymmetric_loss(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print('Epoch %d loss: %.3f' % (epoch + 1, running_loss / len(train_loader)))

    for j, data in enumerate(valid_loader):
        net.eval()
        # predicting
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)        
        optimizer.zero_grad()
        outputs = net(inputs)

        outputs = outputs.flatten().reshape(labels.shape[0],-1)

        # loss_fn = nn.BCELoss()  
        loss = asymmetric_loss(outputs, labels)
        running_loss_val += loss.item()
    print('Epoch %d val loss: %.3f' % (epoch + 1, running_loss_val / len(valid_loader)))

    if len(loss_list_val) > 0:
        print("min loss: ", min(loss_list_val))
        if (running_loss_val / len(valid_loader)) < min(loss_list_val):
            torch.save(net.state_dict(), os.path.join(os.getcwd(), f"ML_Model/transformer_coordination_nopadding_{interval}.pth"))
            print("Model saved")
    
    loss_list.append(running_loss / len(train_loader))
    loss_list_val.append(running_loss_val / len(valid_loader))
    
    # if training loss is lower than previous loss, save the model


In [None]:
plt.plot(loss_list)
plt.plot(loss_list_val)

### Model Testing

In [None]:
# load the model
net = Transformer(dimensions=dimensions, out_dim=nbOut)
net.load_state_dict(torch.load(os.path.join(os.getcwd(), f"ML_Model/transformer_coordination_nopadding_{interval}.pth")))

In [None]:
# test number of feasible solutions
# test the model on the test set
net.eval()
net.to(device)

#### Testing of bit accuracy

In [None]:
thres = 0.5

one_accuracy = []
zero_accuracy = []
bit_accuracy = []
running_loss = 0
mean_one = []
mean_zero = []

for j, data in enumerate(valid_loader):
    
    net.eval()
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)        
    optimizer.zero_grad()
    outputs = net(inputs)

    outputs = outputs.flatten().reshape(labels.shape[0],-1)

    loss_fn = nn.BCELoss()
    loss = loss_fn(outputs, labels)
    running_loss += loss.item()

    # start testing
    outputs = (outputs).reshape(-1,)   
    outputs_percent = outputs
    outputs = torch.where(outputs >= thres, torch.ceil(outputs), torch.floor(outputs)).reshape(-1,)
    # outputs = torch.round(outputs)
    labels = labels.reshape(-1,)

    one_labels = torch.where(labels == 1)
    zero_labels = torch.where(labels == 0)
    
    one_outputs = outputs[one_labels]
    zero_outputs = outputs[zero_labels]

    one_acc = 1 - torch.sum(torch.abs(1 - one_outputs)) / one_outputs.shape[0] # 1 minus percentage of error
    zero_acc = 1 - torch.sum(torch.abs(0 - zero_outputs)) / zero_outputs.shape[0]
    bit_acc = 1 - torch.sum(torch.abs(outputs - labels)) / labels.shape[0]

    one_accuracy.append(one_acc.cpu().detach().numpy())
    zero_accuracy.append(zero_acc.cpu().detach().numpy())
    bit_accuracy.append(bit_acc.cpu().detach().numpy())

    # mean acc
    id_1 = torch.where(outputs == 1)
    id_0 = torch.where(outputs == 0)

    p_1 = outputs_percent[id_1]
    p_0 = outputs_percent[id_0]


    y_1 = labels[id_1]
    y_0 = labels[id_0]

    y_1_1 = torch.where(y_1 == 1)
    y_1_0 = torch.where(y_1 == 0)
    y_0_1 = torch.where(y_0 == 1)
    y_0_0 = torch.where(y_0 == 0)

    avg_1 = torch.mean(torch.cat((p_1[y_1_1], torch.ones(y_1_0[0].shape[0]).to(device) - p_1[y_1_0])))
    avg_0 = torch.mean(torch.cat((p_0[y_0_1], torch.ones(y_0_0[0].shape[0]).to(device) - p_0[y_0_0])))

    # avg_1 = torch.mean(torch.cat((p_1[y_1_1],  p_1[y_1_0])))
    # avg_0 = torch.mean(torch.cat((p_0[y_0_1], p_0[y_0_0])))

    # avg_1 = torch.mean(p_1[y_1_1])
    # avg_0 = torch.mean(p_0[y_0_1])

    # avg_1 = torch.mean(p_1[y_1_0])
    # avg_0 = torch.mean(p_0[y_0_1])

    mean_one.append(avg_1.cpu().detach().numpy())
    mean_zero.append(avg_0.cpu().detach().numpy())

print("Average one bit accuracy", np.mean(one_accuracy))
print("Average zero bit accuracy", np.mean(zero_accuracy))
print("Average bit accuracy", np.mean(bit_accuracy))
print('Loss:', running_loss / len(valid_loader))
print(np.mean(mean_one), np.mean(mean_zero))