In [1]:
import torch
import torch.nn as nn
import pickle

In [2]:
class PrimalNet(nn.Module):
    def __init__(self, data, hidden_sizes):
        super().__init__()
        self._data = data
        self._hidden_sizes = hidden_sizes
        
        # Create the list of layer sizes
        layer_sizes = [data.xdim] + self._hidden_sizes + [data.ydim]
        layers = []

        # Create layers dynamically based on the provided hidden_sizes
        for in_size, out_size in zip(layer_sizes[:-1], layer_sizes[1:]):
            layers.append(nn.Linear(in_size, out_size))
            if out_size != data.ydim:  # Add ReLU activation for hidden layers only
                layers.append(nn.ReLU())

        # Initialize all layers
        for layer in layers:
            if isinstance(layer, nn.Linear):
                nn.init.kaiming_normal_(layer.weight)

        self.net = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.net(x)

class DualNet(nn.Module):
    def __init__(self, data, hidden_sizes, mu_size, lamb_size):
        super().__init__()
        self._data = data
        self._hidden_sizes = hidden_sizes
        self._mu_size = mu_size
        self._lamb_size = lamb_size

        # Create the list of layer sizes
        layer_sizes = [data.xdim] + self._hidden_sizes
        # layer_sizes = [2*data.xdim + 1000] + self._hidden_sizes
        layers = []
        # Create layers dynamically based on the provided hidden_sizes
        for in_size, out_size in zip(layer_sizes[:-1], layer_sizes[1:]):
            layers.append(nn.Linear(in_size, out_size))
            layers.append(nn.ReLU())

        # Initialize all layers
        for layer in layers:
            if isinstance(layer, nn.Linear):
                nn.init.kaiming_normal_(layer.weight)

        # Add the output layer
        self.out_layer = nn.Linear(self._hidden_sizes[-1], self._mu_size + self._lamb_size)
        nn.init.zeros_(self.out_layer.weight)  # Initialize output layer weights to 0
        nn.init.zeros_(self.out_layer.bias)    # Initialize output layer biases to 0
        layers.append(self.out_layer)

        self.net = nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.net(x)
        out_mu = out[:, :self._mu_size]
        out_lamb = out[:, self._mu_size:]
        return out_mu, out_lamb

In [3]:
with open("QP_data/Dual_QP_simple_dataset_var100_ineq50_eq50_ex10000", 'rb') as f:
    original_data = pickle.load(f)

with open("QP_data/Varying_G_type=row_dataset_var100_ineq50_eq50_ex10000", 'rb') as f:
    varying_cm_row_data = pickle.load(f)

with open("QP_data/Varying_G_type=column_dataset_var100_ineq50_eq50_ex10000", 'rb') as f:
    varying_cm_column_data = pickle.load(f)

with open("QP_data/Varying_G_type=random_dataset_var100_ineq50_eq50_ex10000", 'rb') as f:
    varying_cm_random_data = pickle.load(f)


In [4]:
original_primal_net = PrimalNet(original_data, [500, 500])
row_primal_net = PrimalNet(varying_cm_row_data, [500, 500])
col_primal_net = PrimalNet(varying_cm_column_data, [500, 500])
random_primal_net = PrimalNet(varying_cm_random_data, [500, 500])
# Load state dictionaries.
original_primal_state_dict = torch.load("benchmark_experiment_output/original/0.006_primal_net.dict", weights_only=True)
row_primal_state_dict = torch.load("benchmark_experiment_output/row/0.15_primal_net.dict", weights_only=True)
col_primal_state_dict = torch.load("benchmark_experiment_output/column/0.08_primal_net.dict", weights_only=True)
random_primal_state_dict = torch.load("benchmark_experiment_output/random/0.08_primal_net.dict", weights_only=True)

# Load the state dictionaries into the networks.
original_primal_net.load_state_dict(original_primal_state_dict)
row_primal_net.load_state_dict(row_primal_state_dict)
col_primal_net.load_state_dict(col_primal_state_dict)
random_primal_net.load_state_dict(random_primal_state_dict)

<All keys matched successfully>

In [7]:
for model, data in [(original_primal_net, original_data), (row_primal_net, varying_cm_row_data), (col_primal_net, varying_cm_column_data), (random_primal_net, varying_cm_random_data)]:
    X = data.X[data.test_indices]
    Y_pred = model(X)
    obj_known = data.obj_fn(data.Y[data.test_indices]).detach().cpu().numpy()
    obj_pred = data.obj_fn(Y_pred).detach().cpu().numpy()
    obj_gap = ((obj_known - obj_pred)/obj_known).mean()

    ineq_dist = data.ineq_dist(X, Y_pred)
    eq_resid = data.eq_resid(X, Y_pred)

    ineq_max_vals = torch.max(ineq_dist, dim=1)[0].detach().cpu().numpy().mean()
    ineq_mean_vals = torch.mean(ineq_dist, dim=1).detach().cpu().numpy().mean()
    eq_max_vals = torch.max(torch.abs(eq_resid), dim=1)[0].detach().cpu().numpy().mean()
    eq_mean_vals = torch.mean(torch.abs(eq_resid), dim=1).detach().cpu().numpy().mean()
    
    print(obj_known.mean(), obj_pred.mean(), obj_gap, ineq_max_vals, ineq_mean_vals, eq_max_vals, eq_mean_vals)

-15.037454694128508 -14.986555323269725 0.003388805568463467 0.0025407374717075935 0.00022056134756535966 0.005797883440416547 0.001874068337470995
-15.57003679726652 -15.565470437540043 0.00028990328345969435 0.14287585365932684 0.003998443021954674 0.059967531574818066 0.015439284315856441
-15.357705954282094 -13.367091352379102 0.1296201915871199 0.07148420878412369 0.002151673491094704 0.024722809905760316 0.0077619844199713055
-15.602404350014051 -14.55370142820193 0.06708270892571695 0.08031144552102452 0.002277259257184765 0.006866528529356717 0.00221524296613162
