In [6]:
from XMTR import MTR
import numpy as np
import pandas as pd
from scipy.io import arff
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor
import time
import warnings
warnings.simplefilter(action='ignore')

In [7]:
def calc_al_error(instance, perc):
  # the error should be non zero
  return (abs(instance)+0.1)*perc 

# variables under test
total_instances = [500, 5000, 7000]
allowed_errors = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3]

# default values when one other is gettign tested
n_s = 500   # number of samples
n_f = 10    # number of features
n_t = 5     # number of targets
n_e = 500   # number of estimators
m_d = 2     # max depth
m_s_l = 5   # min_samples_leaf
m_f = None  # max_features

# number of instances

In [9]:
total_results = {}
for allowed_error in allowed_errors: 
  print("for allowed error =", allowed_error)
  time_results = {500:[], 5000:[], 7000:[]}
  for iter in range(10):
    print("   iteration:", iter+1)
    for inst in total_instances:
      # create regression data
      X, y = make_regression(n_samples=inst, n_features=n_f, n_targets=n_t, random_state=42)

      # give names to the features
      f_n = pd.Index(["f_"+str(x) for x in range(n_f)])

      # give names to the targets
      t_n = pd.Index(np.array(["t_"+str(x) for x in range(n_t)]))

      X_train = X[1:]
      y_train = y[1:]
      X_test = X[:1]
      y_test = y[:1]

      RFmodel = RandomForestRegressor(n_estimators=n_e, max_depth=m_d, min_samples_leaf=m_s_l, max_features=m_f, n_jobs=-1, random_state=42)
      RFmodel.fit(X_train, y_train)
      MTR_obj = MTR(model=RFmodel, X_train = X_train, X_test=X_test, y_train=y_train, y_test=y_test, feature_names=f_n, target_names=t_n)

      error = calc_al_error(y_test[0], allowed_error)
      ts = time.time()
      MTRrule = MTR_obj.explain(X_test[0], error) # explain instance
      te = time.time() - ts
      rule_length = len(MTR_obj.getFeatureLimits().keys())
      reduction = MTR_obj.getAmountOfReduction()
      print("       instances: ", inst, "est:",reduction, "RL:", rule_length, "/", n_f, " time -> ", te)
      time_results[inst].append([te, reduction[0], reduction[1], rule_length, n_f])

  # for key in time_results.keys():
  #   avg_time = np.array(time_results[key])[:,0].mean()
  #   avg_estimators = np.array(time_results[key])[:,1].mean()
  #   avg_len = np.array(time_results[key])[:,3].mean()
  #   print("   result:")
  #   print("       instances: ", key, "time -> ", np.array(time_results[key])[:,0].mean())
  total_results[allowed_error] = time_results

print("")
print("#######")
print("RESULTS")
print("#######")
for error in allowed_errors:
  print("allowed error: ", error)
  for inst in total_instances:
    time = np.array(total_results[error][inst])[:,0].mean()
    estimators = np.array(total_results[error][inst])[:,1].mean()
    RL = np.array(total_results[error][inst])[:,3].mean()
    print("   instances:", inst, "avg time:", time, " avg est:", estimators, "avg RL:", RL)

for allowed error = 0.05
   iteration: 1
       instances:  500 est: [482, 500] RL: 6 / 10  time ->  0.8837895393371582
       instances:  5000 est: [483, 500] RL: 4 / 10  time ->  0.3917717933654785
       instances:  7000 est: [483, 500] RL: 3 / 10  time ->  0.6603405475616455
   iteration: 2
       instances:  500 est: [482, 500] RL: 6 / 10  time ->  0.8750722408294678
       instances:  5000 est: [483, 500] RL: 4 / 10  time ->  0.5743169784545898
       instances:  7000 est: [483, 500] RL: 3 / 10  time ->  0.669647216796875
   iteration: 3
       instances:  500 est: [482, 500] RL: 6 / 10  time ->  0.8636224269866943
       instances:  5000 est: [483, 500] RL: 4 / 10  time ->  0.5484492778778076
       instances:  7000 est: [483, 500] RL: 3 / 10  time ->  0.6626770496368408
   iteration: 4
       instances:  500 est: [482, 500] RL: 6 / 10  time ->  0.8433377742767334
       instances:  5000 est: [483, 500] RL: 4 / 10  time ->  0.38266491889953613
       instances:  7000 est: [483, 