In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd

import time
from tqdm import tqdm

import random
import pickle
import os

import gurobipy as gp
from gurobipy import GRB

from helpers.pwl_tree import tree_to_code

In [None]:
# load best models
import pickle
with open('./results/pwc_best_models.pickle', 'rb') as best_models_pickle:
    best_models = pickle.load(best_models_pickle)

In [None]:
n_max = 5 # max dimention of the input vector
n_samples = 10 # number of sampled quadratic function for each dimension

times_n = {} # dict that stores solution times
objs_n = {} # dict that stores obj function value
num_binvars_n = {} # dict that stores # binary variables
num_constrs_n = {} # dict that stores # constraints

# set random seed
global_seed = 777
random.seed(global_seed)
np.random.seed(global_seed)
seeds = np.random.randint(1,1000,(1,n_samples))[0]
print(seeds)

In [None]:
for n in tqdm(range(1, n_max+1)):

    times = []
    objs = []
    num_binvars = []
    num_constrs = []
    for n_sample in range(n_samples):
    
        # data reading
        random.seed(seeds[n_sample])
        np.random.seed(seeds[n_sample])
        filename = './' + str(n) + '_' + str(n_sample) + '_train.csv'
        df = pd.read_csv(filename)
        
        X_variable_raw = df.iloc[:, 0:-1].values
        Y_variable_raw = df.iloc[:, -1].values
        
        # scaling
        sc_X = MinMaxScaler()
        sc_y = MinMaxScaler()

        X_variable = sc_X.fit_transform(X_variable_raw)
        Y_variable = sc_y.fit_transform(Y_variable_raw.reshape(-1,1))
        Y_variable = np.squeeze(Y_variable)
    
        # set random seed
        random.seed(global_seed)
        np.random.seed(global_seed)
        
        # use the model trained via quadratic_model_training.ipynb
        best_model = best_models[n][n_sample]
        Y_predict = best_model.predict(X_variable)
        
        # obtain queries corresponding to non-leaf node, as well as leaf node information
        # obtain tree structure information
        tree_dict, predictions = tree_to_code(best_model, feature_names=['x' + str(i) for i in range(n)])
        # revert the scaling of prediction at each leaf node
        for key in predictions.keys():
            predictions[key] = sc_y.inverse_transform(np.asarray(predictions[key]).reshape(-1,1)).item()
        
        # obtain the info of the subdomain corresponding to the leaf
        # map the key from tree 'x_i >= value' & 'x_i <= value' to column number in the tree_params 2D array
        # tree_params: each row represents a leaf; the first (last) n column is the upper bound (lower bound) of each x_i (i in n)
        dict_names = {}
        count = 0 
        for i in range(n):
            lessthan = 'x' + str(i) + ' <='
            largerthan = 'x' + str(i) + ' >'
            dict_names[lessthan] = count
            dict_names[largerthan] = count + n
            count += 1

        n_leaves = len(tree_dict)
    
        tree_params = [[1.0 for i in range(n)] + [0.0 for i in range(n)]]
        tree_params = np.repeat(tree_params, n_leaves, axis = 0)

        count = 0
        for _, leaf in tree_dict.items():
            for key, value in leaf.items():
                tree_params[count][dict_names[key]] = value
            count += 1
        
        # tree_params_less: upper bound of x for a leaf
        # tree_params_larger: lower bound of x for a leaf
        tree_params_less = tree_params[:,:n]
        tree_params_larger = tree_params[:,n:]
        
        # revert scaling of tree params
        tree_params_less = sc_X.inverse_transform(tree_params_less)
        tree_params_larger = sc_X.inverse_transform(tree_params_larger)
        
        # set of leaves； set of input variable dimensions
        rangen_leaves = range(n_leaves)
        rangen = range(n)
            
        #create a new model
        m = gp.Model("PWC")
        m.Params.LogToConsole = 0

        # add variables
        z = m.addVars(rangen_leaves, name = 'z', vtype = GRB.BINARY)
        w = m.addVars(rangen, name = 'w')
        y = m.addVar(obj = 1, name = 'y', lb = -float('inf'))

        # add constraint
        m.addConstr((gp.quicksum(z[l] for l in rangen_leaves) == 1), name = 'onez')  
        m.addConstr((gp.quicksum(predictions[l] * z[l] for l in rangen_leaves) == y), name = 'calcy') 
        m.addConstrs((gp.quicksum(tree_params_larger[l][i] * z[l] for l in rangen_leaves) <= w[i] for i in rangen), name = 'largerthan')
        m.addConstrs((gp.quicksum(tree_params_less[l][i] * z[l]  for l in rangen_leaves) >= w[i] for i in rangen), name = 'lessthan')
        m.update()
        m.write("m.lp")
        
        m.optimize()
        times.append(m.Runtime)
        objs.append(m.objVal)
        num_binvars.append(m.NumBinVars)
        num_constrs.append(m.NumConstrs)
    
    times_n[n] = times
    objs_n[n] = objs
    num_binvars_n[n] = num_binvars
    num_constrs_n[n] = num_constrs

In [None]:
print(times_n)
print(objs_n)
print(num_binvars_n)
print(num_constrs_n)

In [None]:
def pickle_save(path, file, filename):
    file_loc = path + '/' + filename + '.pickle'
    with open(file_loc, 'wb') as handle:
        pickle.dump(file, handle, protocol=pickle.HIGHEST_PROTOCOL)

# create the directory to save the results
path = './results_opt_pwc'

try:
    os.mkdir(path)
except FileExistsError:
    print('Folder already exists')

In [None]:
pickle_save(path, objs_n, 'pwc_objs_n')
pickle_save(path, times_n, 'pwc_times_n')
pickle_save(path, num_binvars_n, 'pwc_binvars_n')
pickle_save(path, num_constrs_n, 'pwc_constrs_n')