In [1]:
import pandas as pd
import numpy as np
import gurobipy as gp
import re
from time import process_time
import instance_generation as ig
import pre_processing as p
import implement_heuristic as ih
import heuristic_program as hp
import evaluate_predictions as ep

In [None]:
#Generate a dataframe with solved instances for the TBR model with Clusters
n_pr = 5 #number of trees considered for the TBR model. 5 in this example.
env = {} # gurobi environment details for accessing individual license. Empty for the example
instance_df = ig.generate_df(n_pr,env)
instance_df.to_excel('instance_data.xlsx', index=False)

#This excel file would be used by the adjoining R script to train Random forest model and extract rules from it

In [None]:
#read the training, testing and rules files

train_df = pd.read_excel('training_data.xlsx')
test_df = pd.read_excel('testing_data.xlsx')
R_rules_df = pd.read_excel('extracted_rules.xlsx')
original_parm_df = pd.read_excel('original_param_train.xlsx')
feature_importance = pd.read_excel('feature_importance.xlsx')
column_indices_list = feature_importance.ordered_column_indices.tolist()
label_column = 'Cluster' #Cluster is the label for this dataset
R_column = 'conditions' #name of the rules column for the rules output

# pre-process the rules to and get the corresponding rules for each outcome label
rules_dict = p.rule_selection(train_df,R_rules_df,R_column,label_column, coverage_threshold = 0.95)

In [None]:
# This function builds the heuristic using the inputs of the rules, training data and feature importance order

heuristic_branches,value_ranges_branch,leaf_nodes,terminating_rules,terminating_rules_outcome,branch_rules = hp.initiate_heuristic(rules_dict, train_df, column_indices_list,branch ,parent_key,feature_limit_input)



#branch is a string has default value '1'
#parent key has default value None
#feature_limit_input is a dict which has default value = {0: {"lower_limit": 0,"upper_limit":4000,"interval": 50},
# 1: {"lower_limit": 0,"upper_limit":600,"interval": 10},
# 2: {"lower_limit": 0,"upper_limit":100,"interval": 5},
# 3: {"lower_limit": 0,"upper_limit":4100,"interval": 25},
# 4: {"lower_limit": 0,"upper_limit":200,"interval": 5},
# 5: {"lower_limit": 0,"upper_limit":1,"interval": 0.05},
# 6: {"lower_limit": 0,"upper_limit":4,"interval": 0.1},
# 7: {"lower_limit": 0,"upper_limit":1,"interval": 0.05},
# 8: {"lower_limit": 0,"upper_limit":1,"interval": 0.05}
# } 

In [None]:
#implementing the heuristic on testing data

test_df = ih.implement_heur_test(test_df,heuristic_branches, terminating_rules, value_ranges_branch,column_indices_list,leaf_nodes,terminating_rules_outcome, level = 0)

#A new column with predicted outcomes would be added to test_df

In [None]:
#evaluating results
original_solution = 'Cluster' # column name for the original solution
predicted_solution = 'predicted_cluster'

wrong_pred_df,final_opt_gap,resolve_time = ep.results_eval(test_df,original_solution, predicted_solution,n_pr,original_parm_df,original_parameters,env)