Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ __pycache__/
*.csv
*.xlsx
# Distribution / packaging
.idea/
.Python
build/
develop-eggs/
Expand Down
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions .idea/explainx.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

152 changes: 152 additions & 0 deletions .idea/inspectionProfiles/Project_Default.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 3 additions & 14 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,26 +1,15 @@
language: python # this works for Linux but is an error on macOS or Windows
language: python # this works for Linux but is an error on macOS or Windows
python:
- "2.7"
- "3.4"
- "3.5"
- "3.6" # current default Python on Travis CI
- "3.7"
- "3.8"
jobs:
include:
- name: "Python 3.8.0 on Xenial Linux"
- name: "Python 3.6.0 on Xenial Linux"
python: 3.8 # this works for Linux but is ignored on macOS or Windows
- name: "Python 3.7.4 on macOS"
- name: "Python 3.6.0 on macOS"
os: osx
osx_image: xcode11.2 # Python 3.7.4 running on macOS 10.14.4
language: shell # 'language: python' is an error on Travis CI macOS
- name: "Python 3.8.0 on Windows"
os: windows # Windows 10.0.17134 N/A Build 17134
language: shell # 'language: python' is an error on Travis CI Windows
before_install:
- choco install python --version 3.8.0
- python -m pip install --upgrade pip
env: PATH=/c/Python38:/c/Python38/Scripts:$PATH
install:
- pip3 install --upgrade pip # all three OSes agree about 'pip3'
- pip install -r requirements.txt
Expand Down
2 changes: 2 additions & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from explainx.explain import *

from explainx.main import *
Binary file modified demo-explainx-with-sound.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
77 changes: 36 additions & 41 deletions explain.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import os
import sys

import re

from pathlib import Path
from sys import platform
import subprocess
Expand All @@ -19,33 +17,25 @@
from calculate_shap import *
from analytics import Analytics

"""
This class calculates feature importance

Input:


"""


class explain():
def __init__(self):
super(explain, self).__init__()
self.param = {}

# is classification function?

def is_classification_given_y_array(self, y_test):
is_classification = False
total = len(y_test)
total_unique = len(set(y_test))
if total < 30:
if total_unique < 10:
is_classification = True
else:
if total_unique < 20:
is_classification = True
return is_classification
# def is_classification_given_y_array(self, y_test):
# is_classification = False
# total = len(y_test)
# total_unique = len(set(y_test))
# if total < 30:
# if total_unique < 10:
# is_classification = True
# else:
# if total_unique < 20:
# is_classification = True
# return is_classification


def random_string_generator(self):
random_str = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
Expand All @@ -54,6 +44,8 @@ def random_string_generator(self):
def ai(self, df, y, model, model_name="xgboost", mode=None):
y_variable = "y_actual"
y_variable_predict = "y_prediction"

#Code for Analytics
instance_id = self.random_string_generator()
analytics = Analytics()
analytics['ip'] = analytics.finding_ip()
Expand All @@ -69,11 +61,6 @@ def ai(self, df, y, model, model_name="xgboost", mode=None):
analytics['finish_time'] = ''
analytics.insert_data()

# If yes, then different shap functuions are required.
# get the shap value based on predcton and make a new dataframe.

# find predictions first as shap values need that.

prediction_col = []

if model_name == "xgboost":
Expand All @@ -88,40 +75,48 @@ def ai(self, df, y, model, model_name="xgboost", mode=None):
prediction_col = model.predict(df.to_numpy())

else:
prediction_col = model.predict(df.to_numpy())
prediction_col = model.predict(df)

# is classification?
is_classification = self.is_classification_given_y_array(prediction_col)
#is_classification = self.is_classification_given_y_array(prediction_col)
ModelType = lambda model: True if is_classifier(model) else False
is_classification = ModelType(model)

# shap
c = calculate_shap()
self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification, model_name=model_name)

# prediction col
#Append Model Decision & True Labels Columns into the dataset.
self.df_final[y_variable_predict] = prediction_col

self.df_final[y_variable] = y

# additional inputs.
if is_classification == True:
# find and add probabilities in the dataset.
prediction_col_prob = model.predict_proba(df.to_numpy())
pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)
#prediction_col_prob = model.predict_proba(df)
#pd_prediction_col_prob = pd.DataFrame(prediction_col_prob)

for c in pd_prediction_col_prob.columns:
self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c])
probabilities = model.predict_proba(df)

classes = []
for c in pd_prediction_col_prob.columns:
classes.append(str(c))
self.param["classes"] = classes
for i in range(len(np.unique(prediction_col))):
self.df_final['Probability: {}'.format(np.unique(prediction_col)[i])] = probabilities[:,i]

self.param['classes'] = np.unique(prediction_col)

#for c in pd_prediction_col_prob.columns:
# self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c])

#classes = []
#for c in pd_prediction_col_prob.columns:
# classes.append(str(c))
#self.param["classes"] = classes

try:
expected_values_by_class = self.explainer.expected_value
except:
expected_values_by_class = []
for c in range(len(classes)):
expected_values_by_class.append(1 / len(classes))
for c in range(len(np.unique(prediction_col))):
expected_values_by_class.append(1 / len(np.unique(prediction_col)))

self.param["expected_values"] = expected_values_by_class
else:
Expand Down
Loading