# Testing

In [13]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from pyexplainer_pyexplainer import PyExplainer
import pickle
import os


cwd = os.getcwd()
parent_dir = os.path.dirname(cwd)
print(parent_dir)
path_train = parent_dir + "/tests/pyexplainer_test_data/activemq-5.0.0.zip"
data = pd.read_csv(path_train, index_col = 'File')

dep = data.columns[-4]
indep = data.columns[0:(len(data.columns) - 4)]


X_train = data.loc[:, indep]
y_train = data.loc[:, dep]

blackbox_model = RandomForestClassifier(max_depth=3, random_state=0)
blackbox_model.fit(X_train, y_train)

class_label = ['Clean', 'Defect']
pyExp = PyExplainer(X_train,
            y_train,
            indep,
            dep,
            blackbox_model,
            class_label=class_label)

path_test = parent_dir + "/tests/pyexplainer_test_data/activemq-5.1.0.zip"
sample_test_data = pd.read_csv(path_test, index_col = 'File')
X_test = sample_test_data.loc[:, indep]
y_test = sample_test_data.loc[:, dep]


sample_explain_index = 0
pyExp.X_explain = X_test.iloc[[sample_explain_index]]
pyExp.y_explain = y_test.iloc[[sample_explain_index]]


# Util functions for reading and writing data
def save_object(object_i, filename):
    with open(filename, 'wb') as file:
        pickle.dump(object_i, file)

def load_object(filename):
    with open(filename, 'rb') as file:
        object_o = pickle.load(file)
    return (object_o)

# load rule obj
if os.path.isfile('../tests/pyExplainer_obj.pyobject'):
    load_pyExp_rule_obj = load_object('../tests/pyExplainer_obj.pyobject')
print(len(X_train.columns), " ", len(X_test.columns))
print(len(y_train), " ", len(y_test))

C:\Users\micha\Documents\GitHub\pyExplainer
65   1970


AttributeError: 'Series' object has no attribute 'columns'

In [5]:
pyExp.visualise(load_pyExp_rule_obj)

HBox(children=(Label(value='Risk Score: '), FloatProgress(value=0.0, bar_style='info', layout=Layout(width='40…

FloatSlider(value=10.0, continuous_update=False, description='#1 Increase the values of CountStmt to more than…

FloatSlider(value=1.0, continuous_update=False, description='#2 Decrease the values of MAJOR_COMMIT to less th…

FloatSlider(value=1.0, continuous_update=False, description='#3 Decrease the values of COMM to less than 1', l…

Output(layout=Layout(border='3px solid black'))

In [6]:
print(pyExp.risk_data)

[{'riskScore': ['8%'], 'riskPred': ['Clean']}]


In [7]:
print(pyExp.bullet_data)

[{'title': '#1 Increase the values of CountStmt to more than 10', 'subtitle': 'Actual = 10', 'ticks': [2.0, 196.0], 'step': [1], 'startPoints': [0, 222.0], 'widths': [222.0, 228.0], 'colors': ['#d7191c', '#a6d96a'], 'markers': [10], 'varRef': 'CountStmt'}, {'title': '#2 Decrease the values of MAJOR_COMMIT to less than 1', 'subtitle': 'Actual = 1', 'ticks': [1.0, 2.0], 'step': [0.1], 'startPoints': [0, 248.0], 'widths': [248.0, 202.0], 'colors': ['#a6d96a', '#d7191c'], 'markers': [1], 'varRef': 'MAJOR_COMMIT'}, {'title': '#3 Decrease the values of COMM to less than 1', 'subtitle': 'Actual = 1', 'ticks': [1, 8.0], 'step': [0.1], 'startPoints': [0, 289.0], 'widths': [289.0, 161.0], 'colors': ['#a6d96a', '#d7191c'], 'markers': [1], 'varRef': 'COMM'}]


In [3]:
%%time
import warnings

explain_index = 1
X_explain = X_test.iloc[[explain_index]]
y_explain = y_test.iloc[[explain_index]]

with warnings.catch_warnings():
    warnings.filterwarnings('error')
    try:
        print("executed")
        pyExp_rule_obj = pyExp.explain(X_explain,
                               y_explain,
                               search_function = 'crossoverinterpolation',
                               top_k = 3, 
                               max_rules=30, 
                               max_iter =5, 
                               cv=5,
                               debug = False)
        print(explain_index, " completed")
    except Warning:
        not_converge_index.append(i)
        print("Index ", explain_index, " cannot be coverged!")

executed
1  completed
CPU times: user 8.04 s, sys: 345 ms, total: 8.39 s
Wall time: 20.5 s


### 3.4 Load Sample Rule Object

#### 3.4.1 Create Reading and Writing Functions

In [13]:
import pickle
import os.path

# Util functions for reading and writing data
def save_object(object_i, filename):
    with open(filename, 'wb') as file:
        pickle.dump(object_i, file)

def load_object(filename):
    with open(filename, 'rb') as file:
        object_o = pickle.load(file)
    return (object_o)

#### 3.4.2 Load Sample Rule Object

In [14]:
# load rule obj
if os.path.isfile('../tests/pyExplainer_obj.pyobject'):
    pyExp_rule_obj = load_object('../tests/pyExplainer_obj.pyobject')

## 4. Pass Rule Object to .visualise(rule_obj) to Generate the Bullet Chart and Interactive Slider
#### Note. the interactive slider is not available in this version

In [15]:
pyExp.visualise(pyExp_rule_obj)

Min 2 Max 987 threshold 97.83 Actual 10 Plot_min 2.0 Plot_max 196.0
Min 1 Max 4 threshold 1.55 Actual 1 Plot_min 1.0 Plot_max 2.0
Min 1 Max 23 threshold 5.5 Actual 1 Plot_min 1 Plot_max 8.0


HBox(children=(Label(value='Risk Score: '), FloatProgress(value=0.0, bar_style='info', layout=Layout(width='40…

FloatSlider(value=10.0, continuous_update=False, description='#1 Increase the values of CountStmt to more than…

FloatSlider(value=1.0, continuous_update=False, description='#2 Decrease the values of MAJOR_COMMIT to less th…

FloatSlider(value=1.0, continuous_update=False, description='#3 Decrease the values of COMM to less than 1', l…

Output(layout=Layout(border='3px solid black'))

## Create a Rule Object Manually 
#### Note. This may take a while to execute

In [1]:
from pyexplainer_pyexplainer import PyExplainer
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
data = pd.read_csv('../tests/pyexplainer_test_data/activemq-5.0.0.csv', index_col = 'File')
dep = data.columns[-4]
indep = data.columns[0:(len(data.columns) - 4)]
X_train = data.loc[:, indep]
y_train = data.loc[:, dep]
blackbox_model = RandomForestClassifier(max_depth=3, random_state=0)
blackbox_model.fit(X_train, y_train)
class_label = ['Clean', 'Defect']
pyExp = PyExplainer(X_train, y_train, indep, dep, class_label, blackbox_model)
sample_test_data = pd.read_csv('../tests/pyexplainer_test_data/activemq-5.0.0.csv', index_col = 'File')
X_test = sample_test_data.loc[:, indep]
y_test = sample_test_data.loc[:, dep]
sample_explain_index = 0
X_explain = X_test.iloc[[sample_explain_index]]
y_explain = y_test.iloc[[sample_explain_index]]
pyExp.explain(X_explain,
                           y_explain,
                           search_function = 'crossoverinterpolation',
                           top_k = 3,
                           max_rules=30,
                           max_iter =5,
                           cv=5,
                           debug = False)

{'synthetic_data':       CountDeclMethodPrivate  AvgLineCode  CountLine  MaxCyclomatic  \
 0                        0.0         6.00     124.00           5.00   
 1                        0.0         6.00     262.00           7.00   
 2                        0.0         6.00     252.00           4.00   
 3                        0.0         7.00     201.00           6.00   
 4                        0.0         3.00      86.00           1.00   
 ...                      ...          ...        ...            ...   
 2235                     0.0         4.14     159.48           5.86   
 2236                     0.0         5.56      92.78           2.82   
 2237                     1.0         9.65     352.30           5.00   
 2238                     0.0         4.22     165.03           5.05   
 2239                     0.0         7.78      82.90           2.59   
 
       CountDeclMethodDefault  AvgEssential  CountDeclClassVariable  \
 0                       0.00           1.0  

## Introduction to Rule Object

### 1. Basic Data Check

In [19]:
print("Type of Rule Object: ", type(pyExp_rule_obj))
print()
print("All of the keys in Rule Object")
i = 1
for k in pyExp_rule_obj.keys():
    print("Key ", i, " - ",k)
    i += 1

Type of Rule Object:  <class 'dict'>

All of the keys in Rule Object
Key  1  -  synthetic_data
Key  2  -  synthetic_predictions
Key  3  -  X_explain
Key  4  -  y_explain
Key  5  -  indep
Key  6  -  dep
Key  7  -  top_k_positive_rules
Key  8  -  top_k_negative_rules
