Load data

In [1]:
import numpy as np
import pandas as pd
from scipy.io import arff
from sklearn.model_selection import train_test_split

# load data
slump_data = arff.loadarff('slump.arff')
slump_df = pd.DataFrame(slump_data[0])

# get column names
column_names = slump_df.columns

# get data/target names
f_n = column_names[:7]
t_n = column_names[7:]

X = slump_df[f_n]
y = slump_df[t_n]

# convert to numpy
X = X.to_numpy()
y = y.to_numpy()

# split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1, random_state=42)

# examined instance
instance = X_test[10]

using MTR

In [2]:
from XMTR import MTR, GlobalSurrogateTree, LocalSurrogateTree
MTR_obj = MTR(X_train, X_test, y_train, y_test, f_n, t_n)
rule = MTR_obj.explain(instance, 1) # you can add as last arguement the allowed error
print(rule)

# this model will be used for L/G surrogate
model = MTR_obj.getModel()
predictions = model.predict(X_train)

reduced_rules:  98 / 100
if 164.0<=Water<=182.25 & 0.0<=Slag<=66.4 & 896.0<=Coarse_Aggr<=1049.45 & 307.0<=Cemment<=329.0 & 9.0<=SP<=19.0 & 705.0<=Fine_Aggr<=902.0 then SLUMP_cm: 13.045 +/- 0.34 error, FLOW_cm: 39.33 +/- 0.78 error, Compressive_Strength_Mpa: 34.7788 +/- 0.5192 error


using global surrogate

In [5]:
GS = GlobalSurrogateTree(X_train, predictions, f_n)
print(GS.rule(instance))

({'Cemment': [['<=', 179.8000030517578], ['>', 150.5]], 'Water': [['<=', 205.9000015258789], ['>', 182.25]], 'Fly_ash': [['>', 98.5]]}, array([15.5 , 35.  , 32.71]))




using local surrogate

In [6]:
LS = LocalSurrogateTree(X_train, predictions, f_n, 10) # neigns should be >= 10
print(LS.rule(instance))

({'Cemment': [['<=', 356.0916409800256]], 'Coarse_Aggr': [['>', 857.907831305595]], 'Fly_ash': [['>', 151.14604087599835]]}, array([23.5 , 60.  , 45.69]))


