In [2]:
from pyexplainer_explainer import Explainer
from time import sleep

bullet_data = [
        {
          "title":
            "#1 Decrease the number of class and method declaration lines to less than 500 lines",
          "subtitle": "Actual = 520 lines",
          "ticks": [400, 600],
          "startPoints": [0, 50*760/200, 130*760/200],
          "widths": [380, 380, 70*760/200],
          "colors": ["#00FF00", "#FA8128", "#00FF00"],
          "markers": [520],
          "step": [1]
        },
        {
          "title": "#2 Decrease the number of distinct developers to less than 2 developers",
          "subtitle": "Actual = 3.2 developers",
          "ticks": [0.0, 5.0],
          "startPoints": [0.0, 304],
          "widths": [304, 456],
          "colors": ["#00FF00", "#FA8128"],
          "markers": [3.2],
          "step": [1]
        }      
      ]

risk_data = [{"riskScore": ["12%"],
            "riskPred": ["No"],
            }]

ex = Explainer(risk_data=risk_data, bullet_data=bullet_data)
ex.show_visualisation()

HBox(children=(Label(value='Risk Score: '), FloatProgress(value=0.0, bar_style='info', layout=Layout(width='40…

FloatSlider(value=520.0, continuous_update=False, description='#1 Decrease the number of class and method decl…

FloatSlider(value=3.2, continuous_update=False, description='#2 Decrease the number of distinct developers to …

Output(layout=Layout(border='3px solid black'))

# Build a blackbox model (RF) and a pyExplainer object

In [2]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from pyexplainer_model import pyExplainer
import pickle
import os.path

In [3]:
# Util functions
def save_object(object_i, filename):
    with open(filename, 'wb') as file:
        pickle.dump(object_i, file)

def load_object(filename):
    with open(filename, 'rb') as file:
        object_o = pickle.load(file)
    return (object_o)

In [4]:
# Load an example dataset and construct an example blackbox model
Data = pd.read_csv('../tests/example-datasets/activemq-5.0.0.csv', index_col = 'File')
dep = Data.columns[-4]
indep = Data.columns[0:(len(Data.columns) - 4)]

X_train = Data.loc[:, indep]
y_train = Data.loc[:, dep]

# Build an example blackbox RF model
blackbox_model = RandomForestClassifier(max_depth=3, random_state=0)
blackbox_model.fit(X_train, y_train)

class_label = ['Clean', 'Defect']

In [5]:
# Create a pyExplainer object
pyExp = pyExplainer(X_train,
            y_train,
            indep,
            dep,
            class_label,
            blackbox_model = blackbox_model
            )

# Load testing data to get an example instance to explain

In [6]:
# Prepare an example instance to explain
Data_2 = pd.read_csv('../tests/example-datasets/activemq-5.0.0.csv', index_col = 'File')

X_test = Data_2.loc[:, indep]
y_test = Data_2.loc[:, dep]

explain_index = 0

X_explain = X_test.iloc[[explain_index]]
y_explain = y_test.iloc[[explain_index]]

In [7]:
if os.path.isfile('../tests/pyExplainer_obj.pyobject'):
    pyExp_rule_obj = load_object('../tests/pyExplainer_obj.pyobject')
else:
    # This can take a long time to execute
    pyExp_rule_obj = pyExp.explain(X_explain,
                                           y_explain,
                                           search_function = 'crossoverinterpolation',
                                           top_k = 3, 
                                           max_rules=30, 
                                           max_iter =5, 
                                           cv=5,
                                           debug = False)

In [8]:
# Show top-k positive (Defect) rules
pyExp_rule_obj['top_k_positive_rules']

Unnamed: 0,rule,type,coef,support,importance,Class
92,MaxNesting_Mean > 0.1850000023841858 & CountDe...,rule,1.898139,0.101828,0.574038,Defect
69,CountLineCodeDecl <= 46.209999084472656 & SumC...,rule,1.352392,0.028721,0.225877,Defect
87,SumCyclomaticModified > 24.84500026702881 & DD...,rule,0.471191,0.130548,0.158747,Defect


In [9]:
# Show top-k negative (Clean) rules
pyExp_rule_obj['top_k_negative_rules']

Unnamed: 0,rule,type,coef,support,importance,Class
85,CountStmt > 97.83000183105469 & MAJOR_COMMIT <...,rule,-7.757467,0.049608,1.684413,Clean
81,COMM <= 5.5 & CountStmt <= 97.83000183105469,rule,-2.193372,0.796345,0.883305,Clean
79,SumCyclomatic <= 31.5 & CountLineCodeDecl <= 4...,rule,-1.491272,0.817232,0.576341,Clean


In [10]:
# Parse rules for visualisation
parsed_rules_obj = pyExp.parse_top_rules(
    pyExp_rule_obj['top_k_positive_rules'], 
    pyExp_rule_obj['top_k_negative_rules'])
parsed_rules_obj

{'top_tofollow_rules': [{'variable': 'CountStmt',
   'lessthan': False,
   'value': '97.83000183105469'},
  {'variable': 'MAJOR_COMMIT',
   'lessthan': True,
   'value': '1.5450000166893005'},
  {'variable': 'COMM', 'lessthan': True, 'value': '5.5'}],
 'top_toavoid_rules': [{'variable': 'MaxNesting_Mean',
   'lessthan': False,
   'value': '0.1850000023841858'},
  {'variable': 'CountDeclFunction',
   'lessthan': False,
   'value': '17.800000190734863'},
  {'variable': 'CountLineCodeDecl',
   'lessthan': True,
   'value': '46.209999084472656'}]}

In [21]:
# Generate bullet and risk data
bullet_data = pyExp.get_bullet_data(parsed_rules_obj, X_explain)
bullet_data

Min 2 Max 987 threshold 97.83 Actual 63 Plot_min 2.0 Plot_max 196.0
Min 1 Max 4 threshold 1.55 Actual 1 Plot_min 1.0 Plot_max 2.0
Min 1 Max 23 threshold 5.5 Actual 1 Plot_min 3.0 Plot_max 8.0


[{'title': '#1 Increase the values of CountStmt to more than 63',
  'subtitle': 'Actual = 63',
  'ticks': [2.0, 196.0],
  'step': [1],
  'startPoints': [0, 375.0],
  'widths': [375.0, 385.0],
  'colors': ['#d7191c', '#a6d96a'],
  'markers': [63],
  'tickFormat': [2.0, 196.0]},
 {'title': '#2 Decrease the values of MAJOR_COMMIT to less than 1',
  'subtitle': 'Actual = 1',
  'ticks': [1.0, 2.0],
  'step': [1],
  'startPoints': [0, 418.0],
  'widths': [418.0, 342.0],
  'colors': ['#a6d96a', '#d7191c'],
  'markers': [1],
  'tickFormat': [1.0, 2.0]},
 {'title': '#3 Decrease the values of COMM to less than 1',
  'subtitle': 'Actual = 1',
  'ticks': [3.0, 8.0],
  'step': [1],
  'startPoints': [0, 380.0],
  'widths': [380.0, 380.0],
  'colors': ['#a6d96a', '#d7191c'],
  'markers': [1],
  'tickFormat': [3.0, 8.0]}]

In [22]:
# Ref bullet data
#           "title":
#             "#1 Decrease the number of class and method declaration lines to less than 500 lines",
#           "subtitle": "Actual = 520 lines",
#           "ticks": [400.0, 600.0],
#           "startPoints": [0, 50*760/200, 130*760/200],
#           "widths": [380, 380, 70*760/200],
#           "colors": ["#00FF00", "#FA8128", "#00FF00"],
#           "markers": [520],
#           "step": [1]

SyntaxError: invalid syntax (<ipython-input-22-ac806acd6c4b>, line 1)

In [23]:
# Generate risk data
risk_data = pyExp.get_risk_data(X_explain)
risk_data

[{'riskScore': ['8%'], 'riskPred': ['Clean']}]

In [24]:
ex = Explainer(risk_data=risk_data, bullet_data=bullet_data)
ex.show_visualisation()

HBox(children=(Label(value='Risk Score: '), FloatProgress(value=0.0, bar_style='info', layout=Layout(width='40…

KeyError: '#3'