# Welcome to PyExplainer Quickstart Guide

## 1. Build a blackbox model (Here we use Random Forest as an example)

### 1.1 Import Libraries Needed

In [3]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from pyexplainer_pyexplainer import PyExplainer
import pickle
import os.path

data = pd.read_csv('../tests/example-datasets/activemq-5.0.0.csv', index_col = 'File')

dep = data.columns[-4]
indep = data.columns[0:(len(data.columns) - 4)]

X_train = data.loc[:, indep]
y_train = data.loc[:, dep]

blackbox_model = RandomForestClassifier(max_depth=3, random_state=0)
blackbox_model.fit(X_train, y_train)

class_label = ['Clean', 'Defect']

pyExp = PyExplainer(X_train,
            y_train,
            indep,
            dep,
            class_label,
            blackbox_model = blackbox_model)

sample_files = pd.read_csv('../tests/example-datasets/activemq-5.0.0.csv', index_col = 'File')

X_test = sample_files.loc[:, indep]
y_test = sample_files.loc[:, dep]

explain_index = 0
pyExp.X_explain = X_test.iloc[[explain_index]]
pyExp.y_explain = y_test.iloc[[explain_index]]

# Util functions for reading and writing data
def save_object(object_i, filename):
    with open(filename, 'wb') as file:
        pickle.dump(object_i, file)

def load_object(filename):
    with open(filename, 'rb') as file:
        object_o = pickle.load(file)
    return (object_o)

# load rule obj
if os.path.isfile('../tests/pyExplainer_obj.pyobject'):
    load_pyExp_rule_obj = load_object('../tests/pyExplainer_obj.pyobject')

In [4]:
pyExp.visualise(load_pyExp_rule_obj)

Min 2 Max 987 threshold 97.83 Actual 10 Plot_min 2.0 Plot_max 196.0
Min 1 Max 4 threshold 1.55 Actual 1 Plot_min 1.0 Plot_max 2.0
Min 1 Max 23 threshold 5.5 Actual 1 Plot_min 1 Plot_max 8.0


HBox(children=(Label(value='Risk Score: '), FloatProgress(value=0.0, bar_style='info', layout=Layout(width='40…

FloatSlider(value=10.0, continuous_update=False, description='#1 Increase the values of CountStmt to more than…

FloatSlider(value=1.0, continuous_update=False, description='#2 Decrease the values of MAJOR_COMMIT to less th…

FloatSlider(value=1.0, continuous_update=False, description='#3 Decrease the values of COMM to less than 1', l…

Output(layout=Layout(border='3px solid black'))

In [2]:
%%time

import warnings

not_converge_index = []

for i in range (5):
    print(i)
    explain_index = i
    X_explain = X_test.iloc[[explain_index]]
    y_explain = y_test.iloc[[explain_index]]
    
    with warnings.catch_warnings():
        warnings.filterwarnings('error')
        try:
            print("executed")
            pyExp_rule_obj = pyExp.explain(X_explain,
                                   y_explain,
                                   search_function = 'crossoverinterpolation',
                                   top_k = 3, 
                                   max_rules=30, 
                                   max_iter =5, 
                                   cv=5,
                                   debug = False)
            print(i, " completed")
        except Warning:
            not_converge_index.append(i)
            print("Index ", i, " cannot be coverged!")


0
executed
0  completed
1
executed
1  completed
2
executed


KeyboardInterrupt: 

### 3.4 Load Sample Rule Object

#### 3.4.1 Create Reading and Writing Functions

In [13]:
import pickle
import os.path

# Util functions for reading and writing data
def save_object(object_i, filename):
    with open(filename, 'wb') as file:
        pickle.dump(object_i, file)

def load_object(filename):
    with open(filename, 'rb') as file:
        object_o = pickle.load(file)
    return (object_o)

#### 3.4.2 Load Sample Rule Object

In [14]:
# load rule obj
if os.path.isfile('../tests/pyExplainer_obj.pyobject'):
    pyExp_rule_obj = load_object('../tests/pyExplainer_obj.pyobject')

## 4. Pass Rule Object to .visualise(rule_obj) to Generate the Bullet Chart and Interactive Slider
#### Note. the interactive slider is not available in this version

In [15]:
pyExp.visualise(pyExp_rule_obj)

Min 2 Max 987 threshold 97.83 Actual 10 Plot_min 2.0 Plot_max 196.0
Min 1 Max 4 threshold 1.55 Actual 1 Plot_min 1.0 Plot_max 2.0
Min 1 Max 23 threshold 5.5 Actual 1 Plot_min 1 Plot_max 8.0


HBox(children=(Label(value='Risk Score: '), FloatProgress(value=0.0, bar_style='info', layout=Layout(width='40…

FloatSlider(value=10.0, continuous_update=False, description='#1 Increase the values of CountStmt to more than…

FloatSlider(value=1.0, continuous_update=False, description='#2 Decrease the values of MAJOR_COMMIT to less th…

FloatSlider(value=1.0, continuous_update=False, description='#3 Decrease the values of COMM to less than 1', l…

Output(layout=Layout(border='3px solid black'))

## Create a Rule Object Manually 
#### Note. This may take a while to execute

In [16]:
# Create Rule Object
pyExp_rule_obj = pyExp.explain(X_explain,
                               y_explain,
                               search_function = 'crossoverinterpolation',
                               top_k = 3, 
                               max_rules=30, 
                               max_iter =5, 
                               cv=5,
                               debug = False)

# err 01 
# unexpected keyword argument - max_iter
# err 02 
# unexpected keyword argument - n_jobs
# err 03
# ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.



KeyboardInterrupt: 

## Introduction to Rule Object

### 1. Basic Data Check

In [19]:
print("Type of Rule Object: ", type(pyExp_rule_obj))
print()
print("All of the keys in Rule Object")
i = 1
for k in pyExp_rule_obj.keys():
    print("Key ", i, " - ",k)
    i += 1

Type of Rule Object:  <class 'dict'>

All of the keys in Rule Object
Key  1  -  synthetic_data
Key  2  -  synthetic_predictions
Key  3  -  X_explain
Key  4  -  y_explain
Key  5  -  indep
Key  6  -  dep
Key  7  -  top_k_positive_rules
Key  8  -  top_k_negative_rules


### Key 1 - synthetic_data

In [26]:
print("Type of pyExp_rule_obj['synthetic_data'] - ", type(pyExp_rule_obj['synthetic_data']))

Type of pyExp_rule_obj['synthetic_data'] -  <class 'pandas.core.frame.DataFrame'>


### Key 2 - synthetic_predictions

In [27]:
print("Type of pyExp_rule_obj['synthetic_predictions'] - ", type(pyExp_rule_obj['synthetic_predictions']))

Type of pyExp_rule_obj['synthetic_predictions'] -  <class 'numpy.ndarray'>


In [2]:
!pip3 install -i https://test.pypi.org/simple/ pyexplainer

Looking in indexes: https://test.pypi.org/simple/
Collecting pyexplainer
  Downloading https://test-files.pythonhosted.org/packages/b5/2d/d80098c4b1ce1f3d6ba9b8bd75d691a6a08568608fd91f616c816394f447/pyexplainer-0.1.0-py3-none-any.whl (356 kB)
[K     |████████████████████████████████| 356 kB 2.8 MB/s eta 0:00:01
[31mERROR: Could not find a version that satisfies the requirement numpy<2.0.0,>=1.20.1 (from pyexplainer)[0m
[31mERROR: No matching distribution found for numpy<2.0.0,>=1.20.1[0m


In [6]:
!pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple pyexplainer

Looking in indexes: https://test.pypi.org/simple/, https://pypi.org/simple
Collecting pyexplainer
  Downloading https://test-files.pythonhosted.org/packages/b5/2d/d80098c4b1ce1f3d6ba9b8bd75d691a6a08568608fd91f616c816394f447/pyexplainer-0.1.0-py3-none-any.whl (356 kB)
[K     |████████████████████████████████| 356 kB 3.2 MB/s eta 0:00:01
Collecting numpy<2.0.0,>=1.20.1
  Downloading numpy-1.20.1-cp38-cp38-macosx_10_9_x86_64.whl (16.0 MB)
[K     |████████████████████████████████| 16.0 MB 10.3 MB/s eta 0:00:01   |███▌                            | 1.7 MB 5.1 MB/s eta 0:00:03     |█████████████████████████████   | 14.5 MB 10.3 MB/s eta 0:00:01
[?25hCollecting pandas<2.0.0,>=1.2.3
  Downloading pandas-1.2.3-cp38-cp38-macosx_10_9_x86_64.whl (10.5 MB)
[K     |████████████████████████████████| 10.5 MB 7.3 MB/s eta 0:00:01    |██████                          | 1.9 MB 7.3 MB/s eta 0:00:02     |███████████                     | 3.6 MB 7.3 MB/s eta 0:00:01     |████████████████████▍           | 

Installing collected packages: numpy, pandas, pyexplainer
  Attempting uninstall: numpy
    Found existing installation: numpy 1.19.2
    Uninstalling numpy-1.19.2:
      Successfully uninstalled numpy-1.19.2
  Attempting uninstall: pandas
    Found existing installation: pandas 1.2.2
    Uninstalling pandas-1.2.2:
      Successfully uninstalled pandas-1.2.2
Successfully installed numpy-1.20.1 pandas-1.2.3 pyexplainer-0.1.0
