In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd

import time
from tqdm import tqdm
import random
import pickle
import os

import gurobipy as gp
from gurobipy import GRB

from helpers.rf import *

In [None]:
# load best models
with open('./results/rf_best_models.pickle', 'rb') as best_models_pickle:
    best_models = pickle.load(best_models_pickle)

In [None]:
n_max = 5 # max dimention of the input vector
n_samples = 10 # number of sampled quadratic function for each dimension

times_n = {} # dict that stores solution times
objs_n = {} # dict that stores obj function values
num_binvars_n = {} # dict that stores # binary variables
num_constrs_n = {} # dict that stores # constraints

# set random seed
global_seed = 777
random.seed(global_seed)
np.random.seed(global_seed)
seeds = np.random.randint(1,1000,(1,n_samples))[0]
print(seeds) 

In [None]:
for n in tqdm(range(1, n_max+1)):

    times = []
    objs = []
    num_binvars = []
    num_constrs = []
    for n_sample in tqdm(range(n_samples)):
    
        # data reading
        random.seed(seeds[n_sample])
        np.random.seed(seeds[n_sample])
        filename = './' + str(n) + '_' + str(n_sample) + '_train.csv'
        df = pd.read_csv(filename)
        
        X_variable_raw = df.iloc[:, 0:-1].values
        Y_variable_raw = df.iloc[:, -1].values
        
        # scaling
        sc_X = MinMaxScaler()
        sc_y = MinMaxScaler()

        X_variable = sc_X.fit_transform(X_variable_raw)
        Y_variable = sc_y.fit_transform(Y_variable_raw.reshape(-1,1))
        Y_variable = np.squeeze(Y_variable)
        
        # set random seed
        random.seed(global_seed)
        np.random.seed(global_seed)
    
        # use the model trained via quadratic_model_training.ipynb
        rf = best_models[n][n_sample]
        Y_predict = rf.predict(X_variable)
    
        # extract model parameters
        flag=1 #regression tree
        # equal weighted trees
        weight_all = 1/rf.n_estimators
        # get a list of trees
        trees=get_input(rf)
        ntrees = len(trees)
        # indices of all the trees
        settrees = range(ntrees)
        # p[i, j]: weighted prediction of leaf j in tree i
        # treeleaftuples: the set contains (tree i, leaf j) tuples
        p = {}
        treeleaftuples = []
        for i in range(len(trees)):
            for j in leaves(trees, i):
                p[i,j] = sc_y.inverse_transform(np.asarray(prediction(trees,i,j,1)).reshape(-1,1)) * weight_all
                treeleaftuples.append((i,j))
        # treesplits: the list of splits
        treesplits = []
        for i in range(ntrees):
            for s in splits(trees, i):
                treesplits.append((i,s))
            
        # create a new model
        m = gp.Model("tree_ensemble")
        m.Params.LogToConsole = 0
        m.setParam('TimeLimit', 600)

        #create variables
        X_one = {}
        for i in total_split_variable(trees):
            for j in range(K(trees,i)):
                X_one[i,j]=m.addVar(vtype=GRB.BINARY, name='X_one'+str(i)+'_'+str(j))
        y = m.addVars(treeleaftuples, obj = p, lb = 0, name = 'y')
        
        # Add constraint
        m.addConstrs((gp.quicksum(y[t,l] for l in leaves(trees, t)) == 1 for t in settrees), name = 'oney')  
        m.addConstrs((gp.quicksum(y[t,l] for l in left_leaf(trees, t, s)) <=  X_one[V(trees,t,s),C(trees,t,s)] for t,s in treesplits), name = 'left')
        m.addConstrs((gp.quicksum(y[t,l] for l in right_leaf(trees, t, s)) <=  1 - X_one[V(trees,t,s),C(trees,t,s)] for t,s in treesplits), name = 'right')
        

        for i in total_split_variable(trees):
            for j in range(K(trees,i)-1):
                m.addConstr(X_one[i,j] - X_one[i,j+1] <= 0)

        m.update()
        m.write("m.lp")
    
        m.optimize()
        times.append(m.Runtime)
        objs.append(m.objVal)
        num_binvars.append(m.NumBinVars)
        num_constrs.append(m.NumConstrs)
    
    times_n[n] = times
    objs_n[n] = objs
    num_binvars_n[n] = num_binvars
    num_constrs_n[n] = num_constrs

In [None]:
print(times_n)
print(objs_n)
print(num_binvars_n)
print(num_constrs_n)

In [None]:
def pickle_save(path, file, filename):
    file_loc = path + '/' + filename + '.pickle'
    with open(file_loc, 'wb') as handle:
        pickle.dump(file, handle, protocol=pickle.HIGHEST_PROTOCOL)

# create the directory to save the results
path = './results_opt_rf'

try:
    os.mkdir(path)
except FileExistsError:
    print('Folder already exists')

In [None]:
pickle_save(path, objs_n, 'rf_objs_n')
pickle_save(path, times_n, 'rf_times_n')
pickle_save(path, num_binvars_n, 'rf_binvars_n')
pickle_save(path, num_constrs_n, 'rf_constrs_n')