In [None]:
from XMTR import MTR
import numpy as np
import pandas as pd
from scipy.io import arff
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor

In [3]:
def calc_al_error(instance, perc):
  # the error should be non zero
  return (abs(instance)+0.1)*perc 


# load data
slump_data = arff.loadarff('slump.arff')
slump_df = pd.DataFrame(slump_data[0])

# get column names
column_names = slump_df.columns

# get data/target values
f_n = column_names[:7]
t_n = column_names[7:]

X = slump_df[f_n]
y = slump_df[t_n]

# convert to numpy
X = X.to_numpy()
y = y.to_numpy()

# scale target values
scaler = MinMaxScaler(feature_range=(0,10))
y = scaler.fit_transform(y)


# split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=42)
allowed_error = 0.5

max_ds = [2]
min_s_ls = [5]
m_fs = [None]
ests = [i for i in range(100, 1001, 50)]
total = (len(max_ds)*len(min_s_ls)*len(m_fs)*len(ests))
counter = 1
all_errors = []
min_error = 10**10 # just a ref
for max_d in max_ds:
  for min_s_l in min_s_ls:
    for m_f in m_fs:
      for est in ests:
        # create/train model
        RFmodel = RandomForestRegressor(n_estimators=est, max_depth=max_d, min_samples_leaf=min_s_l, max_features=m_f, n_jobs=-1, random_state=42)
        RFmodel.fit(X_train, y_train)
        MTR_obj = MTR(model=RFmodel, X_train = X_train, X_test=X_test, y_train=y_train, y_test=y_test, feature_names=f_n, target_names=t_n)
        avgEstimators = []
        MTRpreds = []
        # get results
        for i in range(len(X_test)):
          error = calc_al_error(y_test[i], allowed_error)
          rule = MTR_obj.explain(X_test[i], error) # explain instance
          
          decisionsAndErrors = MTR_obj.getDecisionsAndErros() # get preds/errors
          MTRpreds.append(decisionsAndErrors)

          estimators = MTR_obj.getAmountOfReduction() # get estimators
          avgEstimators.append(estimators[0])

        MTRpreds = np.array(MTRpreds)

        # local error
        MTRerrors = np.array([subarray[:,1] for subarray in MTRpreds])
        total_error = np.mean(MTRerrors, axis=0) # error per target
        final_error = np.array(total_error).mean() # avg error
        all_errors.append(final_error)
        if final_error < min_error:
          min_error = final_error
          best_par = {"max_d=":max_d, "min_s_l=":min_s_l, "m_f=": m_f, "est=": est, "local error=": final_error}
        print(counter, '/', total, "| max_depth=",max_d," | ", "min_sample_leaf=",min_s_l," | ", "max_f=", m_f," | ", "est=", est," | ", "local error=", final_error, " | ", "estimators=", round(np.array(avgEstimators).mean(),3),"/",est)
        counter += 1

1 / 19 | max_depth= 2  |  min_sample_leaf= 5  |  max_f= None  |  est= 100  |  local error= 2.01814370502376  |  estimators= 58.742 / 100
2 / 19 | max_depth= 2  |  min_sample_leaf= 5  |  max_f= None  |  est= 150  |  local error= 2.0288186264210495  |  estimators= 88.0 / 150
3 / 19 | max_depth= 2  |  min_sample_leaf= 5  |  max_f= None  |  est= 200  |  local error= 2.037926706649111  |  estimators= 117.0 / 200
4 / 19 | max_depth= 2  |  min_sample_leaf= 5  |  max_f= None  |  est= 250  |  local error= 2.03485711884485  |  estimators= 147.194 / 250
5 / 19 | max_depth= 2  |  min_sample_leaf= 5  |  max_f= None  |  est= 300  |  local error= 2.0420722677913976  |  estimators= 176.71 / 300
6 / 19 | max_depth= 2  |  min_sample_leaf= 5  |  max_f= None  |  est= 350  |  local error= 2.0342577349167055  |  estimators= 207.387 / 350
7 / 19 | max_depth= 2  |  min_sample_leaf= 5  |  max_f= None  |  est= 400  |  local error= 2.035503049291747  |  estimators= 236.774 / 400
8 / 19 | max_depth= 2  |  min_sam

In [4]:
all_errors

[2.01814370502376,
 2.0288186264210495,
 2.037926706649111,
 2.03485711884485,
 2.0420722677913976,
 2.0342577349167055,
 2.035503049291747,
 2.039513753835292,
 2.044352748042723,
 2.0452754655043868,
 2.044316089420582,
 2.0448407285092736,
 2.0427553024255443,
 2.0445947411786025,
 2.0455358050983343,
 2.045487563071102,
 2.04548728690282,
 2.0455153210430024,
 2.0440639101501263]

In [5]:
for e in all_errors:
  print(e)

2.01814370502376
2.0288186264210495
2.037926706649111
2.03485711884485
2.0420722677913976
2.0342577349167055
2.035503049291747
2.039513753835292
2.044352748042723
2.0452754655043868
2.044316089420582
2.0448407285092736
2.0427553024255443
2.0445947411786025
2.0455358050983343
2.045487563071102
2.04548728690282
2.0455153210430024
2.0440639101501263
