In [27]:
from lstm_benchmark_bob import Network, predict_sequences, evaluate_predictions 
from torch.utils.data import DataLoader, TensorDataset, random_split


import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import math
import time
import matplotlib.pyplot as plt
import gurobipy as gp
from gurobipy import GRB




In [28]:
from transformer_mpc import load_data_npz, normalize_batch, normalize_tensor, global_mean, global_std
file_path = "path_to_microgrid_data.npz"
## Generate Data


cbuy_tensor, csell_tensor, cprod_tensor, power_res_tensor, power_load_tensor, x0_tensor, delta_transformed_tensor = load_data_npz(file_path)
net_power_load = power_load_tensor - power_res_tensor
src_data = torch.cat([
    cbuy_tensor.unsqueeze(-1),  # Adding an extra dimension for feature alignment
    csell_tensor.unsqueeze(-1),
    cprod_tensor.unsqueeze(-1),
    net_power_load.unsqueeze(-1),
    x0_tensor.unsqueeze(1).repeat(1, cbuy_tensor.size(1), 1)  # Repeating x0 across the sequence length
], dim=-1)  # Concatenate along the last dimension to combine features

#Create target data 
tgt_data = delta_transformed_tensor.long()

print("src_data shape" , src_data.shape)
print("tgt_data shape" , tgt_data.shape)




src_data shape torch.Size([16000, 25, 5])
tgt_data shape torch.Size([16000, 25])


In [29]:
## Create Training and Validation Dataset and Mini Batches
mini_batch_size = 32 
dataset_size = src_data.size(0)
val_size = int(dataset_size * 0.2) 
train_size = dataset_size - val_size

full_dataset = TensorDataset(src_data, tgt_data)
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=mini_batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=mini_batch_size, shuffle=False)

In [31]:
from lstm_benchmark import Network, predict_sequences
from transformer_mpc import compute_global_stats, normalize_tensor,  transform_predicted_to_binary_matrix, run_optimization, run_optimization_milp
device = torch.device('cpu')

model_lstm = Network(input_size=5, hidden_size=128, num_layers=1, lr=1e-4, n_actions=32)
model_lstm.load_state_dict(torch.load('LSTM2_network_model.pth'))
model_lstm.to(device)  # Ensure model is on the right device
model_lstm.eval() 
src_batch, tgt_batch = next(iter(val_dataloader))  # Get a batch of data
src_tensor_normalized = normalize_tensor(src_batch[0:1], global_mean, global_std)  # Normalize and use the first item as an example

def predict_single_sequence(model, src_tensor_normalized, device):
    """
    Predicts output for a single input sequence using the provided LSTM model.
    
    Parameters:
    - model: The trained LSTM model.
    - src_tensor_normalized: A normalized input tensor.
    - device: The device (CPU or GPU) the model is running on.
    
    Returns:
    - predicted: The predicted output as a numpy array.
    """
    model.eval()  # Set the model to evaluation mode
    src_tensor_normalized = src_tensor_normalized.to(device)  # Ensure the tensor is on the correct device
    
    # Initialize the hidden and cell states to zeros
    h0 = torch.zeros(model.num_layers, src_tensor_normalized.size(0), model.hidden_size).to(device)
    c0 = torch.zeros(model.num_layers, src_tensor_normalized.size(0), model.hidden_size).to(device)

    with torch.no_grad():  # Context-manager that disables gradient calculation
        outputs = model(src_tensor_normalized, h0, c0)
        _, predicted = torch.max(outputs, 2)  # Get the class with the highest probability for each timestep

    return predicted.cpu().numpy() 
    
predict_single_sequence(model_lstm, src_tensor_normalized, device)




array([[23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
        23, 23, 23, 23, 23, 23, 23, 23, 23]])

In [32]:
from transformer_mpc import gurobi_qp, hybrid_fhocp
def run_optimization(delta, src_tensor, model_type):
    try:
        # Ensure input to gurobi_qp is correctly formatted
        x0 = np.array(src_tensor[0,1,4])
        net_power_load = src_tensor[:,:,3].numpy().squeeze()
        cbuy = src_tensor[:,:,0].numpy().squeeze()
        csell = src_tensor[:,:,1].numpy().squeeze()
        cprod = src_tensor[:,:,2].numpy().squeeze()
        
        # Call gurobi_qp with validated inputs
        mdl = gurobi_qp(x0, 25, net_power_load, cbuy, csell, cprod, delta)
        

        return mdl
    except Exception as e:
        print(f"Exception during {model_type} model optimization: {e}")
        return None
    
def run_optimization_milp(src_tensor, model_type):
    try:
        # Ensure input to gurobi_qp is correctly formatted
        x0 = np.array(src_tensor[0,1,4])
        net_power_load = src_tensor[:,:,3].numpy().squeeze()
        cbuy = src_tensor[:,:,0].numpy().squeeze()
        csell = src_tensor[:,:,1].numpy().squeeze()
        cprod = src_tensor[:,:,2].numpy().squeeze()
        
        # Call gurobi_qp with validated inputs
        mdl_milp = hybrid_fhocp(x0, 25, net_power_load, cbuy, csell, cprod)


        return mdl_milp
    except Exception as e:
        print(f"Exception during {model_type} model optimization: {e}")
        return None




def evaluate_model_over_dataset(model, val_dataset, num_instances, device):
    # Ensure num_instances does not exceed the length of the dataset
    num_samples = min(num_instances, len(val_dataset))
    
    cost_actual_mem = []
    cost_pred_mem = []
    successful_optimizations = 0
    milp_times = []
    lp_times = []

    model.eval()  # Set the model to evaluation mode

    for _ in range(num_samples):
        # Randomly select an index
        idx = np.random.randint(0, len(val_dataset))

        # Fetch the data at the randomly selected index
        src_tensor, tgt_tensor = val_dataset[idx]
        src_tensor = src_tensor.unsqueeze(0)  # Add batch dimension if your model expects it
        tgt_tensor = tgt_tensor.unsqueeze(0)  # Add batch dimension if your model expects it


        # Normalize the source tensor
        global_mean, global_std = compute_global_stats(val_dataset)  # Assume this function is defined correctly
        src_tensor_normalized = normalize_tensor(src_tensor, global_mean, global_std)
        # Actual model optimization using MILP
        delta_actual = transform_predicted_to_binary_matrix(tgt_tensor)
        
        mdl_act = run_optimization(delta_actual, src_tensor, 'actual')
        mdl_milp = run_optimization_milp(src_tensor, 'milp')
        milp_times.append(mdl_milp.Runtime) 

        if mdl_act and mdl_act.status == gp.GRB.OPTIMAL:
            cost_actual_mem.append(mdl_act.ObjVal)

        # LSTM Model predictions and subsequent optimizations
        start_time_pred = time.time()
        predicted = predict_single_sequence(model, src_tensor, device)
        predicted = torch.tensor(predicted)
        prediction_time = time.time() - start_time_pred
        # Convert predicted indices to actionable decisions if necessary
        delta_predicted = transform_predicted_to_binary_matrix(predicted)  # This may need to be adjusted
        mdl_pred = run_optimization(delta_predicted, src_tensor, model.__class__.__name__)
        if mdl_pred and mdl_pred.status == gp.GRB.OPTIMAL:
            successful_optimizations += 1
            total_lp_time = prediction_time + mdl_pred.Runtime
            lp_times.append(total_lp_time)
            cost_pred_mem.append(mdl_pred.ObjVal)

    # Calculate average costs and the optimality gap
    avg_cost_actual = np.mean(cost_actual_mem) if cost_actual_mem else 0
    avg_cost_pred = np.mean(cost_pred_mem) if cost_pred_mem else 0
    optimality_gap = abs((avg_cost_pred - avg_cost_actual)) / avg_cost_actual if avg_cost_actual != 0 else 0
    
    avg_milp_time = np.mean(milp_times)
    avg_lp_time = np.mean(lp_times)

    # Output results
    print(f"Optimality gap: {optimality_gap:.2%}")
    print(f"Average MILP time: {avg_milp_time:.4f} seconds")
    print(f"Average LP time: {avg_lp_time:.4f} seconds")
    print(f"Number of successful optimizations: {successful_optimizations}/{num_samples}")


num_instances = 1
evaluate_model_over_dataset(model_lstm, val_dataset, num_instances, device)


Optimality gap: 0.00%
Average MILP time: 0.0040 seconds
Average LP time: 0.0015 seconds
Number of successful optimizations: 1/1


In [49]:
from transformer_mpc import try_model

def evaluate_model_lstm_vs_transformer_over_dataset(model_lstm, tf_models, val_dataset, num_instances, device):
    # Ensure num_instances does not exceed the length of the dataset
    num_samples = min(num_instances, len(val_dataset))
    
    cost_actual_mem = []
    cost_pred_lstm = []
    cost_pred_tf = []
    successful_optimizations_lstm = 0
    successful_optimizations_tf = 0
    milp_times = []
    lp_times_lstm = []
    lp_times_tf = []
    stage_successes_tf = [0] * len(tf_models)


    model_lstm.eval()  # Set the model to evaluation mode

    for _ in range(num_samples):
        # Randomly select an index
        idx = np.random.randint(0, len(val_dataset))

        # Fetch the data at the randomly selected index
        src_tensor, tgt_tensor = val_dataset[idx]
        src_tensor = src_tensor.unsqueeze(0)  # Add batch dimension if your model expects it
        tgt_tensor = tgt_tensor.unsqueeze(0)  # Add batch dimension if your model expects it


        # Normalize the source tensor
        global_mean, global_std = compute_global_stats(val_dataset)  # Assume this function is defined correctly
        src_tensor_normalized = normalize_tensor(src_tensor, global_mean, global_std)
        # Actual model optimization using MILP
        delta_actual = transform_predicted_to_binary_matrix(tgt_tensor)
        
        mdl_act = run_optimization(delta_actual, src_tensor, 'actual')
        mdl_milp = run_optimization_milp(src_tensor, 'milp')
        milp_times.append(mdl_milp.Runtime) 

        if mdl_act and mdl_act.status == gp.GRB.OPTIMAL:
            cost_actual_mem.append(mdl_act.ObjVal)

        # LSTM Model predictions and subsequent optimizations
        start_time_pred = time.time()
        predicted = predict_single_sequence(model_lstm, src_tensor, device)
        predicted = torch.tensor(predicted)
        prediction_time = time.time() - start_time_pred
        # Convert predicted indices to actionable decisions if necessary
        delta_predicted_lstm = transform_predicted_to_binary_matrix(predicted)  # This may need to be adjusted
        mdl_pred_lstm = run_optimization(delta_predicted_lstm, src_tensor, model.__class__.__name__)
        if mdl_pred_lstm and mdl_pred_lstm.status == gp.GRB.OPTIMAL:
            successful_optimizations_lstm += 1
            total_lp_time = prediction_time + mdl_pred_lstm.Runtime
            lp_times_lstm.append(total_lp_time)
            cost_pred_lstm.append(mdl_pred_lstm.ObjVal)
        
        optimization_found = False
        
        for i, tf_model in enumerate(tf_models):
            tf_model.eval()
            start_time_pred = time.time() 
            delta_predicted_tf = try_model(tf_model, src_tensor_normalized)
            prediction_time = time.time() - start_time_pred 

            mdl_pred_tf = run_optimization(delta_predicted_tf, src_tensor, 'Transformer')
            if mdl_pred_tf and mdl_pred_tf.status == gp.GRB.OPTIMAL:
                if not optimization_found:
                    optimization_found = True
                    total_lp_time = prediction_time + mdl_pred_tf.Runtime
                    lp_times_tf.append(total_lp_time)
                    cost_pred_tf.append(mdl_pred_tf.ObjVal)
                    successful_optimizations_tf += 1
                    for j in range(i, len(tf_models)):
                        stage_successes_tf[j] += 1
                    break
                
    # Calculate average costs and the optimality gap
    avg_cost_actual = np.mean(cost_actual_mem) if cost_actual_mem else 0
    avg_cost_tf = np.mean(cost_pred_tf) if cost_pred_tf else 0
    avg_cost_lstm = np.mean(cost_pred_lstm) if cost_pred_lstm else 0

    optimality_gap_tf = abs((avg_cost_tf - avg_cost_actual)) / avg_cost_actual if avg_cost_actual != 0 else 0
    optimality_gap_lstm = abs((avg_cost_lstm - avg_cost_actual)) / avg_cost_actual if avg_cost_actual != 0 else 0

    
    
    avg_milp_time = np.mean(milp_times)
    avg_lp_time_tf = np.mean(lp_times_tf)
    avg_lp_time_lstm = np.mean(lp_times_lstm)

    for i in range(len(tf_models)):
        success_rate_tf = (stage_successes_tf[i] / num_samples) * 100
        print(f"Success rate with up to {i+1} model(s): {success_rate_tf:.2f}%")
    

    # Output results
    print(f"Optimality gap TF: {optimality_gap_tf:.2%}")
    print(f"Optimality gap LSTM: {optimality_gap_lstm:.2%}")
    
    print(f"Average MILP time: {avg_milp_time:.4f} seconds")
    print(f"Average LP time TF: {avg_lp_time_tf:.4f} seconds")
    print(f"Average LP time LSTM: {avg_lp_time_lstm:.4f} seconds")
    
    print(f"Number of successful optimizations TF: {successful_optimizations_tf}/{num_samples}")
    print(f"Number of successful optimizations LSTM: {successful_optimizations_lstm}/{num_samples}")


tf_models = [shallow, shallow_model2, shallow_model3, backup_model4]

num_instances = 1000

evaluate_model_lstm_vs_transformer_over_dataset(model_lstm, tf_models, val_dataset, num_instances, device)




Success rate with up to 1 model(s): 92.40%
Success rate with up to 2 model(s): 96.40%
Success rate with up to 3 model(s): 97.00%
Success rate with up to 4 model(s): 99.40%
Optimality gap TF: 0.76%
Optimality gap LSTM: 0.18%
Average MILP time: 0.0037 seconds
Average LP time TF: 0.0487 seconds
Average LP time LSTM: 0.0019 seconds
Number of successful optimizations TF: 994/1000
Number of successful optimizations LSTM: 999/1000


In [50]:
from transformer_mpc import cascaded_transformer


evaluate_model_over_dataset_w_multiple_backups2(tf_models, val_dataset, num_instances)

Success rate with up to 1 model(s): 93.20%
Success rate with up to 2 model(s): 96.60%
Success rate with up to 3 model(s): 97.10%
Success rate with up to 4 model(s): 99.30%
Optimality gap: 0.62%
Average MILP time: 0.0038 seconds
Average LP time: 0.0565 seconds
