In [1]:
import pytest
import pandas as pd
import numpy as np
import ipywidgets as widgets
from sklearn.ensemble import RandomForestClassifier
from pyexplainer import __version__
from pyexplainer import pyexplainer_pyexplainer
from pyexplainer.pyexplainer_pyexplainer import PyExplainer
from sklearn.utils import check_random_state
import os
import sys
import pickle
from bs4 import BeautifulSoup


# load data
cwd = os.getcwd()
file_path = cwd + "/pyexplainer_test_data/activemq-5.0.0.zip"
model_file_path = cwd + '/rf_models/rf_model1.pkl'
test_file_path = cwd + "/pyexplainer_test_data/activemq-5.1.0.zip"
rule_object_path = cwd + '/rule_objects/pyExplainer_obj.pyobject'

train_data = pd.read_csv(file_path, index_col='File')

dep = train_data.columns[-4]
selected_features = ["ADEV", "AvgCyclomaticModified", "AvgEssential", "AvgLineBlank", "AvgLineComment",
                     "CountClassBase", "CountClassCoupled", "CountClassDerived", "CountDeclClass",
                     "CountDeclClassMethod", "CountDeclClassVariable", "CountDeclInstanceVariable",
                     "CountDeclMethodDefault", "CountDeclMethodPrivate", "CountDeclMethodProtected",
                     "CountDeclMethodPublic", "CountInput_Mean", "CountInput_Min", "CountOutput_Min", "MAJOR_LINE",
                     "MaxInheritanceTree", "MaxNesting_Min", "MINOR_COMMIT", "OWN_COMMIT", "OWN_LINE",
                     "PercentLackOfCohesion", "RatioCommentToCode"]
all_cols = train_data.columns
for col in all_cols:
    if col not in selected_features:
        all_cols = all_cols.drop(col)
indep = all_cols
X_train = train_data.loc[:, indep]
y_train = train_data.loc[:, dep]

""" load model from .pkl file """
# with open(model_file_path, 'rb') as file:
#    blackbox_model = pickle.load(file)

blackbox_model = RandomForestClassifier(max_depth=3, random_state=0)
blackbox_model.fit(X_train, y_train)
""" write model to pickle - done """
#with open(model_file_path, 'wb') as file:
#    pickle.dump(obj=blackbox_model, file=file)

class_label = ['clean', 'defect']

py_explainer = PyExplainer(X_train, y_train, indep, dep, blackbox_model)
# load data
cwd = os.getcwd()
sample_test_data = pd.read_csv(test_file_path, index_col='File')
X_test = sample_test_data.loc[:, indep]
y_test = sample_test_data.loc[:, dep]
sample_explain_index = 0
testing_X_explain = X_test.iloc[[sample_explain_index]]
testing_y_explain = y_test.iloc[[sample_explain_index]]
testing_bullet_data = [{'title': '#1 Increase the values of CountStmt to more than 10',
                        'subtitle': 'Actual = 10',
                        'ticks': [2.0, 196.0],
                        'step': [1],
                        'startPoints': [0, 222.0],
                        'widths': [222.0, 228.0],
                        'colors': ['#d7191c', '#a6d96a'],
                        'markers': [10],
                        'varRef': 'CountStmt'},
                       {'title': '#2 Decrease the values of MAJOR_COMMIT to less than 1',
                        'subtitle': 'Actual = 1',
                        'ticks': [1.0, 2.0],
                        'step': [0.1],
                        'startPoints': [0, 248.0],
                        'widths': [248.0, 202.0],
                        'colors': ['#a6d96a', '#d7191c'],
                        'markers': [1],
                        'varRef': 'MAJOR_COMMIT'}]
test_rule_object = py_explainer.explain(X_explain=testing_X_explain,
                                        y_explain=testing_y_explain,
                                        search_function='crossoverinterpolation',
                                        top_k=3,
                                        max_rules=30,
                                        max_iter=10000,
                                        cv=5,
                                        debug=False)


top_rules = py_explainer.parse_top_rules(top_k_positive_rules=test_rule_object['top_k_positive_rules'],
                                 top_k_negative_rules=test_rule_object['top_k_negative_rules'])
py_explainer.X_explain = testing_X_explain
py_explainer.y_explain = testing_y_explain

b = py_explainer.generate_bullet_data(top_rules)
b

[{'title': '#1 Decrease the values of CountClassCoupled to less than 0',
  'subtitle': 'Actual = 0',
  'ticks': [0, 19.0],
  'step': [1],
  'startPoints': [0, 276.0],
  'widths': [276.0, 174.0],
  'colors': ['#a6d96a', '#d7191c'],
  'markers': [0],
  'varRef': 'CountClassCoupled'}]