In [None]:
from mlinsights.mlmodel import PiecewiseRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression

import numpy as np
import pandas as pd
import math

from tqdm import tqdm
import random
import pickle
import os

import gurobipy as gp
from gurobipy import GRB

from helpers.pwl_tree import get_coef, get_subdomain, y_min_max_calc

In [None]:
from sklearn.datasets import make_friedman3

In [None]:
n = 4 # max dimention of the input vector
n_samples = 10 # number of sampled quadratic function for each dimension

# set random seed
global_seed = 777
random.seed(global_seed)
np.random.seed(global_seed)
seeds = np.random.randint(1,1000,(1,n_samples))[0]

In [None]:
# mean value of the deterministic part of the function generating Friedman 3 dataset
mean_val = 1.324

In [None]:
times = [] # list that stores solution times
objs = [] # list that stores obj function values
num_binvars = [] # list that stores # binary variables
num_constrs = [] # list that stores # constraints
best_models = [] # list that stores the best trained model
mapes = [] # list that stores MAPEs of the trained model

In [None]:
with open('./results_opt_friedman3_max_pwl/best_models.pickle', 'rb') as best_models_pickle:
    best_models = pickle.load(best_models_pickle)

In [None]:
for i in tqdm(range(n_samples)):
    
    # generate data using make_friedman1 function
    # dependent variables add the mean value to enable the downstream cross dataset/method statistical tests
    X_variable_raw, Y_variable_raw = make_friedman3(n_samples = 5000, random_state = seeds[i])
    Y_variable_raw += mean_val
    X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_variable_raw, Y_variable_raw,
                                                                        random_state=seeds[i], test_size = 0.25)
    
    # scaling
    sc_X = MinMaxScaler()
    sc_y = MinMaxScaler()

    X_variable = sc_X.fit_transform(X_train_raw)
    Y_variable = sc_y.fit_transform(y_train_raw.reshape(-1,1))
    Y_variable = np.squeeze(Y_variable)
    
    labels=np.array(Y_variable)
    features = np.array(X_variable)
    
    # continue to scaling for obtain parameters for subdomains
    dx = sc_X.data_range_
    dy = sc_y.data_range_
    xmin = sc_X.data_min_
    ymin = sc_y.data_min_
    
    # use the model trained
    best_tree = best_models[i]
    best_models.append(best_tree)
    
    # prediction accuracy 
    y_predict = best_tree.predict(sc_X.transform(X_test_raw))
    y_predict_inversed = sc_y.inverse_transform(y_predict.reshape(-1,1))
    mapes.append(mean_absolute_percentage_error(y_test_raw, y_predict_inversed))
    
    # obtain queries corresponding to non-leaf node, as well as leaf node information
    # revert the scaling of prediction at each leaf node
    y_train_predict = np.transpose(sc_y.inverse_transform(best_tree.predict(X_variable).reshape(-1,1))).tolist()[0]
    # ymin_in_bins (ymax_in_bins) contains min/max predicted y values for each leaf
    ymin_in_bins, ymax_in_bins = y_min_max_calc(best_tree, X_variable, y_train_predict)
    
    # linear regression model for each leaf node
    lincoeff = get_coef(best_tree)
    # obtain subdomain information for each leaf node; cannot directly obtain it from the PiecewiseRegressor using this version of mlinsights;
    # Train the decision tree to obtain the binner used by the PiecewiseRegressor
    dctree = DecisionTreeRegressor(random_state=0,min_samples_leaf=best_tree.binner.min_samples_leaf)
    dctree.fit(X_variable, Y_variable)
    # obtain tree structure information
    tree_dict, tree_params = get_subdomain(dctree, n)
        
    # tree_params_less: upper bound of x for a leaf
    # tree_params_larger: lower bound of x for a leaf
    tree_params_less = tree_params[:,:n]
    tree_params_larger = tree_params[:,n:]
    # number of leaves
    n_leaves = best_tree.n_estimators_
    
    # inverse transform coefficients and intercepts of linear regressors of each leaf
    lincoeff_coeffs_raw = lincoeff[:,:-1]
    lincoeff_intecept_raw = lincoeff[:,-1]
        
    lincoeff_coeffs = lincoeff_coeffs_raw
    lincoeff_intecept = lincoeff_intecept_raw
    
    lincoeff_coeffs = lincoeff_coeffs_raw * dy
    lincoeff_coeffs = np.divide(lincoeff_coeffs,dx)
    lincoeff_intecept = lincoeff_intecept_raw - np.sum(np.divide(np.multiply(lincoeff_coeffs_raw, xmin), dx), axis=1)
    lincoeff_intecept *= dy
    lincoeff_intecept += ymin
    
    # revert scaling of tree params
    tree_params_less = sc_X.inverse_transform(tree_params_less)
    tree_params_larger = sc_X.inverse_transform(tree_params_larger)

    # set generation
    # set for each input variable dimension
    rangen = range(n)
    # set for leaf nodes
    rangen_leaves = range(n_leaves)
    # (u, m) tuple in the paper
    rangeleaven = []
    for i in rangen:
        for j in rangen_leaves:
            rangeleaven.append((i,j))
    # set containing (m, k) tuple for constraints for truncation
    rangemk = []
    rangek = range(3)
    for i in rangen_leaves:
        for j in rangek:
            rangemk.append((i,j))
            
    #create a new model
    m = gp.Model("PWL")
    m.Params.LogToConsole = 0
    m.setParam('TimeLimit', 60)

    # create variables
    # binary variable Z_m
    z = m.addVars(rangen_leaves, name = 'z', vtype = GRB.BINARY)
    # independent variable X_u
    x = m.addVars(rangen, ub = [100, 280*2*math.pi, 1, 11], lb = [0, 20*2*math.pi, 0, 1], name = 'x')
    # disaggregate variable \bar{X}_{u, m}
    x_bar = m.addVars(rangeleaven, name = 'x_bar')
    # disaggregate variable \bar{Y}_m
    y_bar = m.addVars(rangen_leaves, name = 'y_bar', lb = -10)
    # dependent variable Y
    y = m.addVar(obj = 1, name = 'y', lb = -10)
    # disaggregate variable \tilde{Y}_{m, k}
    y_tilde = m.addVars(rangemk, name = 'y_tilde', lb = -10)
    # binary disaggregate variable \bar{Z}_{m, k}
    w = m.addVars(rangemk, name = 'w', vtype = GRB.BINARY)
    # disaggregate variable \hat{Y}
    y_hat = m.addVars(rangen_leaves, name = 'y_hat', lb = -10)
    
    # obj
    m.setObjective(y, GRB.MINIMIZE)

    # Add constraint
    # Constraint 3B-1
    m.addConstr((gp.quicksum(z[l] for l in rangen_leaves) == 1), name = 'onez')
        
    # Constraint 3B-2
    m.addConstrs((tree_params_larger[l][k] * z[l] <= x_bar[k,l] for k,l in rangeleaven), name = 'largerthan')
    m.addConstrs((tree_params_less[l][k] * z[l] >= x_bar[k,l] for k,l in rangeleaven), name = 'lessthan')
        
    # Constraint 3B-3
    m.addConstrs((gp.quicksum(x_bar[k,l] for l in rangen_leaves) == x[k] for k in rangen), name = 'calcx') 
        
    # Constraint 3B-4
    m.addConstr((gp.quicksum(y_bar[l] for l in rangen_leaves) == y), name = 'calcy')
        
    # Constraint 3B-5
    m.addConstrs((gp.quicksum(lincoeff_coeffs[l][k] * x_bar[k,l] for k in rangen) + lincoeff_intecept[l] * z[l] == y_hat[l] \
                  for l in rangen_leaves), name = 'calcy_hat')
        
    # Constraint 3B-6
    m.addConstrs((y_bar[l] == ymax_in_bins[l] * w[l,0] + y_tilde[l, 1] + ymax_in_bins[l] * w[l,2] for l in rangen_leaves),
                     name = 'calcy_bar')
        
    # Constraint 3B-7
    m.addConstrs((gp.quicksum(y_tilde[l,k] for k in rangek) == y_hat[l] for l in rangen_leaves), name = 'calcy_hat1')
        
    # Constraint 3B-8
    m.addConstrs((gp.quicksum(w[l,k] for k in rangek) == z[l] for l in rangen_leaves), name = 'calcw')
        
    # Constraint 3B-9
    m.addConstrs((y_tilde[l, 0] <= ymin_in_bins[l] * w[l,0] for l in rangen_leaves), name = 'calcy_tilde0')
        
    # Constraint 3B-10
    m.addConstrs((y_tilde[l, 1] >= ymin_in_bins[l] * w[l,1] for l in rangen_leaves), name = 'calcy_tilde1')
    m.addConstrs((y_tilde[l, 1] <= ymax_in_bins[l] * w[l,1] for l in rangen_leaves), name = 'calcy_tilde11')
        
    # Constraint 3B-11
    m.addConstrs((y_tilde[l, 2] >= ymax_in_bins[l] * w[l,2] for l in rangen_leaves), name = 'calcy_tilde2')
    
    m.update()
    m.write("m.lp")
    
    m.optimize()
    
    times.append(m.Runtime)
    objs.append(m.objVal)
    num_binvars.append(m.NumBinVars)
    num_constrs.append(m.NumConstrs)

In [None]:
print(times)
print(objs)
print(num_binvars)
print(num_constrs)
print(mapes)

In [None]:
def pickle_save(path, file, filename):
    file_loc = path + '/' + filename + '.pickle'
    with open(file_loc, 'wb') as handle:
        pickle.dump(file, handle, protocol=pickle.HIGHEST_PROTOCOL)

# create the directory to save the results
path = './results_opt_friedman3_min_pwl'

try:
    os.mkdir(path)
except FileExistsError:
    print('Folder already exists')

In [None]:
pickle_save(path, objs, 'objs')
pickle_save(path, times, 'times')
pickle_save(path, num_binvars, 'num_binvars')
pickle_save(path, num_constrs, 'num_constrs')
pickle_save(path, best_models, 'best_models')