In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd

import time
from tqdm import tqdm

import random
from collections import defaultdict
import pickle
import os

import gurobipy as gp
from gurobipy import GRB

In [None]:
# load best models
import pickle
with open('./results/relu_best_models.pickle', 'rb') as best_models_pickle:
    best_models = pickle.load(best_models_pickle)

In [None]:
n_max = 5 # max dimention of the input vector
n_samples = 10 # number of sampled quadratic function for each dimension

times_n = {} # dict that stores solution times
objs_n = {} # dict that stores obj function values
num_binvars_n = {} # dict that stores # binary variables
num_constrs_n = {} # dict that stores # constraints

# set random seed
global_seed = 777
random.seed(global_seed)
np.random.seed(global_seed)
seeds = np.random.randint(1,1000,(1,n_samples))[0]
print(seeds)

In [None]:
for n in tqdm(range(1, n_max+1)):

    times = []
    objs = []
    num_binvars = []
    num_constrs = []

    for n_sample in tqdm(range(n_samples)):
        
        # data reading
        random.seed(seeds[n_sample])
        np.random.seed(seeds[n_sample])
        filename = './' + str(n) + '_' + str(n_sample) + '_train.csv'
        df = pd.read_csv(filename)
        
        # Y = ReLU_Network(X)
        X_variable_raw = df.iloc[:, 0:-1].values
        Y_variable_raw = df.iloc[:, -1].values
        
        # scaling
        sc_X = MinMaxScaler()
        sc_y = MinMaxScaler()

        X_variable = sc_X.fit_transform(X_variable_raw)
        Y_variable = sc_y.fit_transform(Y_variable_raw.reshape(-1,1))
        Y_variable = np.squeeze(Y_variable)
        
        # set random seed
        random.seed(global_seed)
        np.random.seed(global_seed)
        
        # use the model trained via quadratic_model_training.ipynb
        nn = best_models[n][n_sample]
        Y_predict = nn.predict(X_variable)
    
        # obtain MILP parameters from the ReLU network structure/parameters
        # weight and bias
        weights = nn.coefs_
        biases = nn.intercepts_
        
        # kj_raw: number of neurons in the ith hidden layer; k_hidden: number of hidden layers
        kj_raw = nn.hidden_layer_sizes
        k_hidden = nn.n_layers_ - 2
        # indices_k_hidden: set of hidden layers; indices_kj_hidden: set of tuple (layer k, jth neuron in this layer)
        # indices_kj_dict: mapping of hidden layer k -> list of neurons j in this hidden layer
        indices_k_hidden = range(1, k_hidden+1)
        indices_kj_hidden = []
        indices_kj_dict = defaultdict(list)
        for k in indices_k_hidden:
            for j in range(kj_raw[k-1]):
                indices_kj_hidden.append((k,j))
                indices_kj_dict[k].append(j)
        indices_kj_dict[nn.n_layers_ - 1] = [0]
        indices_kj_dict[0] = list(range(n))
        # indices_kj_first: (k, j) tuple for input layer; indices_kj_last: (k, j) tuple for the output layer
        # indices_kj: (k, j) tuple for all the layers in the network
        # indices_kj_nofirst: (k, j) tuple for all the layers except for the first layer
        indices_kj_first = [(0,i) for i in range(n)]
        indices_kj_last = [(nn.n_layers_ - 1, 0)]
        indices_kj = indices_kj_first + indices_kj_hidden + indices_kj_last
        indices_kj_nofirst = indices_kj_hidden + indices_kj_last
    
        # paramters
        
        # lower/upper bound of variable X
        # x_lb_first/x_ub_first: lower/upper bound of variable X for the input layer
        x_lb_first = [-2] * n
        x_ub_first = [2] * n
        
        # x_ub/x_lb: lower/upper bound of variable X in all the layers
        # x_ub_dict: mapping of layer -> list of upper bound of all neurons in the layer
        bigM = 1e5
        
        x_ub = x_ub_first + [bigM] * len(indices_kj_hidden) + [bigM]
        x_lb = x_lb_first + [0] * len(indices_kj_hidden) + [-bigM]
        x_ub_dict = defaultdict(list)
        x_ub_dict[0] = x_ub_first
        for k in indices_k_hidden:
            x_ub_dict[k] = [bigM] * kj_raw[k-1]
        x_ub_dict[k_hidden + 1] = [bigM]
        
        # scaling: the network is trained using the min-max scaled variables, so we need to have scaler_coeff to do
        # (1) min_max scaling the input variable (happened at the input layer)
        # (2) min_max scaling the output variable (happened at the output layer)
        # to make the constraints consistent we still have scaler_coeff[k] for hidden layer but the values are set to 1 (no scaling)
        scaler_coeff = {}
        scaler_coeff[0] = np.reciprocal(sc_X.data_range_).tolist() # scaler_coeff = 1/(Xmax - Xmin) (i.e., the original range)
        for k in indices_k_hidden:
            scaler_coeff[k] = [1] * kj_raw[k-1] # scaler_coeff = 1 (no scaling)
        scaler_coeff[k_hidden + 1] = np.reciprocal(sc_y.data_range_).tolist() # scaler_coeff = 1/(Ymax - Ymin)
        
        scaler_min = {}
        scaler_min[0] = sc_X.data_min_.tolist() # Xmin before scaling
        for k in indices_k_hidden:
            scaler_min[k] = [0] * kj_raw[k-1] # Xmin = 0 (no scaling)
        scaler_min[k_hidden + 1] = sc_y.data_min_.tolist() # Ymin before scaling
    
        #create a new model
        m = gp.Model("RELU")
        m.Params.LogToConsole = 0
        m.setParam(GRB.Param.TimeLimit, 1800.0)

        #create variables
        z = m.addVars(indices_kj_nofirst, name = 'z', vtype = GRB.BINARY)
        x = m.addVars(indices_kj, ub = x_ub, lb = x_lb, name = 'x')
        s = m.addVars(indices_kj_nofirst, name = 's')
    
        # Maximization
        m.setObjective(x[nn.n_layers_-1,0], GRB.MINIMIZE)

        # Add constraint
        m.addConstrs((gp.quicksum(weights[k-1][l][j] * (x[k-1,l] - scaler_min[k-1][l]) * scaler_coeff[k-1][l] for l in indices_kj_dict[k-1]) + biases[k-1][j] \
                      == (x[k,j] - scaler_min[k][j]) * scaler_coeff[k][j] - s[k,j] for (k,j) in indices_kj_nofirst),
                     name = 'calc_layers')
        m.addConstrs((x[k,j] <= x_ub_dict[k][j] * z[k,j] for (k,j) in indices_kj_nofirst), name = "constraint_x")
        m.addConstrs((s[k,j] <= x_ub_dict[k][j] * (1 - z[k,j]) for (k,j) in indices_kj_nofirst), name = "constraint_z")
    
        m.update()
        m.write("m.lp")
    
        m.optimize()
        times.append(m.Runtime)
        objs.append(m.objVal)
        num_binvars.append(m.NumBinVars)
        num_constrs.append(m.NumConstrs)
    
    times_n[n] = times
    objs_n[n] = objs
    num_binvars_n[n] = num_binvars
    num_constrs_n[n] = num_constrs

In [None]:
print(times_n)
print(objs_n)
print(num_binvars_n)
print(num_constrs_n)

In [None]:
def pickle_save(path, file, filename):
    file_loc = path + '/' + filename + '.pickle'
    with open(file_loc, 'wb') as handle:
        pickle.dump(file, handle, protocol=pickle.HIGHEST_PROTOCOL)

# create the directory to save the results
path = './results_opt_relu'

try:
    os.mkdir(path)
except FileExistsError:
    print('Folder already exists')

In [None]:
pickle_save(path, objs_n, 'relu_objs_n')
pickle_save(path, times_n, 'relu_times_n')
pickle_save(path, num_binvars_n, 'relu_binvars_n')
pickle_save(path, num_constrs_n, 'relu_constrs_n')