diff --git a/.gitignore b/.gitignore index abf2a0a..a75727b 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__/ *.csv *.xlsx # Distribution / packaging +.idea/ .Python build/ develop-eggs/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/explainx.iml b/.idea/explainx.iml new file mode 100644 index 0000000..8a05c6e --- /dev/null +++ b/.idea/explainx.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..b6e4ae7 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,152 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..8a902d2 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..0fbe114 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 49bc6fa..c05f53e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,26 +1,15 @@ -language: python # this works for Linux but is an error on macOS or Windows +language: python # this works for Linux but is an error on macOS or Windows python: - - "2.7" - - "3.4" - - "3.5" - "3.6" # current default Python on Travis CI - "3.7" - - "3.8" jobs: include: - - name: "Python 3.8.0 on Xenial Linux" + - name: "Python 3.6.0 on Xenial Linux" python: 3.8 # this works for Linux but is ignored on macOS or Windows - - name: "Python 3.7.4 on macOS" + - name: "Python 3.6.0 on macOS" os: osx osx_image: xcode11.2 # Python 3.7.4 running on macOS 10.14.4 language: shell # 'language: python' is an error on Travis CI macOS - - name: "Python 3.8.0 on Windows" - os: windows # Windows 10.0.17134 N/A Build 17134 - language: shell # 'language: python' is an error on Travis CI Windows - before_install: - - choco install python --version 3.8.0 - - python -m pip install --upgrade pip - env: PATH=/c/Python38:/c/Python38/Scripts:$PATH install: - pip3 install --upgrade pip # all three OSes agree about 'pip3' - pip install -r requirements.txt diff --git a/__init__.py b/__init__.py index 6fc38a4..1de6659 100644 --- a/__init__.py +++ b/__init__.py @@ -1 +1,3 @@ from explainx.explain import * + +from explainx.main import * diff --git a/demo-explainx-with-sound.gif b/demo-explainx-with-sound.gif index 1a83e44..e69de29 100644 Binary files a/demo-explainx-with-sound.gif and b/demo-explainx-with-sound.gif differ diff --git a/explain.py b/explain.py index 8cba40d..42dd6fc 100644 --- a/explain.py +++ b/explain.py @@ -1,8 +1,6 @@ import os import sys - import re - from pathlib import Path from sys import platform import subprocess @@ -19,15 +17,6 @@ from calculate_shap import * from analytics import Analytics -""" -This class calculates feature importance - -Input: - - -""" - - class explain(): def __init__(self): super(explain, self).__init__() @@ -35,17 +24,18 @@ def __init__(self): # is classification function? - def is_classification_given_y_array(self, y_test): - is_classification = False - total = len(y_test) - total_unique = len(set(y_test)) - if total < 30: - if total_unique < 10: - is_classification = True - else: - if total_unique < 20: - is_classification = True - return is_classification + # def is_classification_given_y_array(self, y_test): + # is_classification = False + # total = len(y_test) + # total_unique = len(set(y_test)) + # if total < 30: + # if total_unique < 10: + # is_classification = True + # else: + # if total_unique < 20: + # is_classification = True + # return is_classification + def random_string_generator(self): random_str = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) @@ -54,6 +44,8 @@ def random_string_generator(self): def ai(self, df, y, model, model_name="xgboost", mode=None): y_variable = "y_actual" y_variable_predict = "y_prediction" + + #Code for Analytics instance_id = self.random_string_generator() analytics = Analytics() analytics['ip'] = analytics.finding_ip() @@ -69,11 +61,6 @@ def ai(self, df, y, model, model_name="xgboost", mode=None): analytics['finish_time'] = '' analytics.insert_data() - # If yes, then different shap functuions are required. - # get the shap value based on predcton and make a new dataframe. - - # find predictions first as shap values need that. - prediction_col = [] if model_name == "xgboost": @@ -88,40 +75,48 @@ def ai(self, df, y, model, model_name="xgboost", mode=None): prediction_col = model.predict(df.to_numpy()) else: - prediction_col = model.predict(df.to_numpy()) + prediction_col = model.predict(df) # is classification? - is_classification = self.is_classification_given_y_array(prediction_col) + #is_classification = self.is_classification_given_y_array(prediction_col) + ModelType = lambda model: True if is_classifier(model) else False + is_classification = ModelType(model) # shap c = calculate_shap() self.df_final, self.explainer = c.find(model, df, prediction_col, is_classification, model_name=model_name) - # prediction col + #Append Model Decision & True Labels Columns into the dataset. self.df_final[y_variable_predict] = prediction_col - self.df_final[y_variable] = y # additional inputs. if is_classification == True: # find and add probabilities in the dataset. - prediction_col_prob = model.predict_proba(df.to_numpy()) - pd_prediction_col_prob = pd.DataFrame(prediction_col_prob) + #prediction_col_prob = model.predict_proba(df) + #pd_prediction_col_prob = pd.DataFrame(prediction_col_prob) - for c in pd_prediction_col_prob.columns: - self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c]) + probabilities = model.predict_proba(df) - classes = [] - for c in pd_prediction_col_prob.columns: - classes.append(str(c)) - self.param["classes"] = classes + for i in range(len(np.unique(prediction_col))): + self.df_final['Probability: {}'.format(np.unique(prediction_col)[i])] = probabilities[:,i] + + self.param['classes'] = np.unique(prediction_col) + + #for c in pd_prediction_col_prob.columns: + # self.df_final["probability_of_predicting_class_" + str(c)] = list(pd_prediction_col_prob[c]) + + #classes = [] + #for c in pd_prediction_col_prob.columns: + # classes.append(str(c)) + #self.param["classes"] = classes try: expected_values_by_class = self.explainer.expected_value except: expected_values_by_class = [] - for c in range(len(classes)): - expected_values_by_class.append(1 / len(classes)) + for c in range(len(np.unique(prediction_col))): + expected_values_by_class.append(1 / len(np.unique(prediction_col))) self.param["expected_values"] = expected_values_by_class else: diff --git a/lib/analytics.py b/lib/analytics.py index 0fb0e75..6f0c175 100644 --- a/lib/analytics.py +++ b/lib/analytics.py @@ -9,27 +9,18 @@ def __init__(self): @staticmethod def finding_address(): - try: - val = get_mac() - return val - - except Exception as e : - return None + val = get_mac() + return val @staticmethod def finding_ip(): - try: - val = socket.gethostbyname(socket.gethostname()) - return val - except Exception as e: - return None + val = socket.gethostbyname(socket.gethostname()) + return val @staticmethod def finding_system(): - try: - return platform.system() - except Exception as e: - return None + return platform.system() + def __setitem__(self, key, val): self.dict[key] = val diff --git a/lib/calculate_shap.py b/lib/calculate_shap.py index 3500b1a..c497fde 100644 --- a/lib/calculate_shap.py +++ b/lib/calculate_shap.py @@ -9,7 +9,6 @@ """ - class calculate_shap(): def __init__(self): super(calculate_shap, self).__init__() diff --git a/lib/dashboard.py b/lib/dashboard.py index 6f95ad2..adfdf79 100644 --- a/lib/dashboard.py +++ b/lib/dashboard.py @@ -464,7 +464,6 @@ def toggle_collapse(n, is_open): return is_open #Cohort Analysis - Callbacks - @app.callback( Output("modal", "is_open"), [Input("open", "n_clicks"), Input("close", "n_clicks")], @@ -675,6 +674,7 @@ def update_graph(xaxis_column_name, third_axis_name, sql_query): g = plotly_graphs() graph_type = 'pdp' df3 = self.caching_data_manager(df, sql_query, graph_type, g.partial_dependence_plot) + print(df3) fig = g.pdp_plot(df3, df3[xaxis_column_name], df3[xaxis_column_name+"_impact"], df3[third_axis_name]) return fig diff --git a/lib/encode_decode_cat_col.py b/lib/encode_decode_cat_col.py index b958119..b3af243 100644 --- a/lib/encode_decode_cat_col.py +++ b/lib/encode_decode_cat_col.py @@ -1,13 +1,7 @@ from imports import * from sklearn.preprocessing import OneHotEncoder import numpy as np -""" -This class calculates feature importance -Input: - - -""" class encode_decode_cat_col(): diff --git a/lib/feature_impact.py b/lib/feature_impact.py index e59891f..e80297b 100644 --- a/lib/feature_impact.py +++ b/lib/feature_impact.py @@ -1,14 +1,5 @@ from imports import * -""" -This class calculates feature impact - -Input: - - -""" - - class feature_impact(): def __init__(self): super(feature_impact, self).__init__() @@ -16,7 +7,8 @@ def __init__(self): def find(self, df): - + df = pd.DataFrame(df) + print(df) variables = [col for col in df.columns if '_impact' in col] y = [] for i in range(len(variables)): diff --git a/lib/feature_impact_classification.py b/lib/feature_impact_classification.py index b996ae0..fbe5344 100644 --- a/lib/feature_impact_classification.py +++ b/lib/feature_impact_classification.py @@ -1,14 +1,5 @@ from imports import * -""" -This class calculates feature impact - -Input: - - -""" - - class feature_impact_classification(): def __init__(self): super(feature_impact_classification, self).__init__() diff --git a/lib/feature_importance.py b/lib/feature_importance.py index fd858d1..0b68164 100644 --- a/lib/feature_importance.py +++ b/lib/feature_importance.py @@ -1,14 +1,5 @@ from imports import * -""" -This class calculates feature importance - -Input: - - -""" - - class feature_importance(): def __init__(self): super(feature_importance, self).__init__() diff --git a/lib/feature_importance_classification.py b/lib/feature_importance_classification.py index 228d98f..61d7037 100644 --- a/lib/feature_importance_classification.py +++ b/lib/feature_importance_classification.py @@ -1,13 +1,5 @@ from imports import * -""" -This class calculates feature importance - -Input: - - -""" - class feature_importance_classification(): def __init__(self): diff --git a/lib/frameworks/__init__.py b/lib/frameworks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/frameworks/shapley_pdp.py b/lib/frameworks/shapley_pdp.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/frameworks/shapley_values.py b/lib/frameworks/shapley_values.py new file mode 100644 index 0000000..439f62f --- /dev/null +++ b/lib/frameworks/shapley_values.py @@ -0,0 +1,318 @@ +from operator import is_ +import shap +from explainx.lib.utils import is_classification +import pandas as pd +import numpy as np +class ShapleyValues(): + + def __init__(self, model,input_data, target_data, ct): + #super().__init__(model, input_data, ct) + + self.model = model + self.input_data = input_data + self.actual_data = target_data + self.ct = ct + #self.row_number = 0 + + def tree_explainer(self): + explainer = shap.TreeExplainer(self.model) + try: + #classification case + predictions = self.model.predict + probabilities = self.model.predict_proba + return explainer, predictions, probabilities + except: + #regression case + predictions = self.model.predict + return explainer, predictions + + def kernel_explainer(self): + try: + #classification case + explainer = shap.KernelExplainer(self.model.predict_proba, + shap.sample(self.input_data, 100), + link='logit', + feature_names=self.input_data.columns, + seed=0) + predictions = self.model.predict + prediction_probabilities = self.model.predict_proba + return explainer, predictions, prediction_probabilities + except: + #regression case + explainer = shap.KernelExplainer(self.model.predict, + shap.sample(self.input_data, 100), + link='identity', + feature_names=self.input_data.columns, + seed=0) + predictions = self.model.predict + return explainer, predictions + + def kernel_explainer_with_ct(self): + try: + #classification case + pred_fcn = lambda x : self.model.predict_proba(self.ct.transform(x)) + explainer = shap.KernelExplainer(pred_fcn, shap.sample(self.input_data, 100), + link='logit', + feature_names=self.input_data.columns, + seed=0) + pred = lambda x : self.model.predict(self.ct.transform(x)) + return explainer, pred, pred_fcn + + except: + pred_fcn = lambda x : self.model.predict(self.ct.transform(x)) + explainer = shap.KernelExplainer(pred_fcn, shap.sample(self.input_data, 100), + link='identity', + feature_names=self.input_data.columns, + seed=0) + return explainer, pred_fcn + + def shap_explainer(self): + if is_classification(self.model): + if self.ct == None: + try: + explainer, pred, pred_prob = self.tree_explainer() + return explainer, pred, pred_prob + except: + try: + explainer, pred, pred_prob = self.kernel_explainer() + return explainer, pred, pred_prob + except: + raise Exception(("{} not supported. Please create an issue on Github").format(self.model)) + else: + try: + explainer, pred, pred_fcn = self.kernel_explainer_with_ct() + return explainer, pred, pred_fcn + except: + raise Exception(("{} not supported. Please create an issue on Github").format(self.model)) + else: + if self.ct == None: + try: + explainer, pred = self.tree_explainer() + return explainer, pred + except: + try: + explainer, pred = self.kernel_explainer() + return explainer, pred + except: + raise Exception(("{} not supported. Please create an issue on Github").format(self.model)) + else: + try: + explainer, pred_fcn = self.kernel_explainer_with_ct() + return explainer, pred_fcn + except: + raise Exception(("{} not supported. Please create an issue on Github").format(self.model)) + + + + def append_shap_values_to_df(self, input_sv, in_data, scope): + + df_shap = pd.DataFrame(input_sv) + + features = list(self.input_data.columns) + shap_columns = [] + for i in features: + shap_columns.append(i + "_impact") + + try: + df_shap.columns = shap_columns + except: + df_shap = df_shap.T + df_shap.columns = shap_columns + + input_data = in_data + + if scope == 'global': + for i in shap_columns: + input_data[i] = list(df_shap[i]) + return input_data + else: + for i in shap_columns: + input_data[i] = list(df_shap[i])[0] + return input_data + + def global_shap_plotting(self): + if is_classification(self.model): + explainer, pred, pred_fcn = self.shap_explainer() + if type(explainer) == shap.explainers._tree.Tree: + global_shap_values = explainer.shap_values(self.input_data) + data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values[0], + in_data=self.input_data.copy(), scope="global") + prediction = pred(self.input_data) + probabilities = pred_fcn(self.input_data) + + data_with_shap['Model Decision'] = prediction + data_with_shap['True Values'] = self.actual_data + for i in range(len(np.unique(prediction))): + data_with_shap['Probability: {}'.format(np.unique(prediction)[i])] = probabilities[:,i] + return explainer, global_shap_values, data_with_shap + else: + predictions = pred(shap.sample(self.input_data,100)) + global_shap_values = explainer.shap_values(shap.sample(self.input_data,100)) + data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values[0], + in_data=shap.sample(self.input_data,100).copy(), + scope='global') + prediction = pred(shap.sample(self.input_data,100)) + probabilities = pred_fcn(shap.sample(self.input_data,100)) + data_with_shap['Model Decision'] = prediction + data_with_shap['True Values'] = self.actual_data + for i in range(len(np.unique(self.actual_data))): + data_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i] + + return explainer, global_shap_values, data_with_shap + else: + explainer, pred = self.shap_explainer() + if type(explainer) == shap.explainers._tree.Tree: + #Complete! Do not change. + global_shap_values = explainer.shap_values(self.input_data) + data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values, + in_data=self.input_data.copy(), + scope="global") + data_with_shap['Model Decision'] = pred(self.input_data) + data_with_shap['True Values'] = self.actual_data + return explainer, global_shap_values, data_with_shap + + else: + global_shap_values = explainer.shap_values(shap.sample(self.input_data,100)) + predictions = pred(shap.sample(self.input_data,100)) + data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values, + in_data=shap.sample(self.input_data,100).copy(), + scope="global") + data_with_shap['Model Decision'] = pred(shap.sample(self.input_data,100)) + data_with_shap['True Values'] = shap.sample(self.actual_data,100) + return explainer, global_shap_values, data_with_shap + + def add_shap_row(self, input_data, row_number): + if is_classification(self.model): + explainer, pred, pred_prob = self.shap_explainer() + else: + explainer, pred = self.shap_explainer() + + if type(explainer) == shap.explainers._tree.Tree: + shap_values = explainer.shap_values(input_data) + + + shap_row = self.append_shap_values_to_df(input_sv = shap_values[0], + in_data= input_data.copy(), + scope='local') + + shap_row['Model Decision'] = pred(pd.DataFrame(input_data))[0] + shap_row['Actual Decision'] = self.actual_data[row_number] + + if is_classification(self.model): + probabilities = pred_prob(np.array(input_data)) + for i in range(len(np.unique(self.actual_data))): + shap_row['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] + + return shap_row + else: + shap_values = explainer.shap_values(input_data) + + shap_row = self.append_shap_values_to_df(input_sv = shap_values, + in_data= input_data.copy(), + scope='local') + + shap_row['Model Decision'] = pred(pd.DataFrame(input_data).T)[0] + if is_classification(self.model): + probabilities = pred_prob([np.array(input_data)]) + for i in range(len(np.unique(self.actual_data))): + shap_row['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] + return shap_row + + + def shap_local(self, row_number): + if is_classification(self.model): + explainer, pred, pred_prob = self.shap_explainer() + + else: + explainer, pred = self.shap_explainer() + + if row_number > len(self.input_data): + raise IndexError(f"index {row_number} is out of bounds for axis 0 with size {len(self.input_data)}") + else: + if type(explainer) == shap.explainers._tree.Tree: + local_shap_values = explainer.shap_values(self.input_data.iloc[row_number,:]) + row_with_shap = self.append_shap_values_to_df(input_sv = local_shap_values, + in_data=self.input_data.iloc[row_number,:].copy(), + scope='local') + + row_with_shap['Model Decision'] = pred(pd.DataFrame(self.input_data.iloc[row_number]).T)[0] + row_with_shap['True Values'] = pd.DataFrame(self.actual_data).iloc[row_number][0] + + if is_classification(self.model): + probabilities = pred_prob([np.array(self.input_data)[row_number]]) + for i in range(len(np.unique(self.actual_data))): + row_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] + + return explainer, local_shap_values, row_with_shap + else: + local_shap_values = explainer.shap_values(self.input_data.iloc[row_number,:]) + row_with_shap = self.append_shap_values_to_df(input_sv = local_shap_values, + in_data= self.input_data.iloc[row_number,:].copy(), + scope='local') + + row_with_shap['Model Decision'] = pred(pd.DataFrame(self.input_data.iloc[row_number]).T)[0] + row_with_shap['True Values'] = pd.DataFrame(self.actual_data).iloc[row_number][0] + + if is_classification(self.model): + probabilities = pred_prob(pd.DataFrame(self.input_data.iloc[row_number]).T) + for i in range(len(np.unique(self.actual_data))): + row_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] + + return explainer, local_shap_values, row_with_shap + + + def data_for_shap(self, input_data): + if is_classification(self.model): + explainer, pred, pred_fcn = self.shap_explainer() + if type(explainer) == shap.explainers._tree.Tree: + global_shap_values = explainer.shap_values(input_data) + data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values[0], + in_data=input_data.copy(), scope="local") + prediction = pred([input_data]) + probabilities = pred_fcn([input_data]) + + data_with_shap['Model Decision'] = prediction[0] + #data_with_shap['True Values'] = self.actual_data + + + for i in range(len(np.unique(self.actual_data))): + data_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] + return data_with_shap + else: + predictions = pred(shap.sample(input_data,100)) + global_shap_values = explainer.shap_values(shap.sample(input_data,100)) + data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values[0], + in_data=shap.sample(input_data,100).copy(), + scope='local') + prediction = pred(shap.sample(input_data,100)) + probabilities = pred_fcn(shap.sample(input_data,100)) + data_with_shap['Model Decision'] = prediction[0] + #data_with_shap['True Values'] = self.actual_data + + for i in range(len(np.unique(self.actual_data))): + data_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] + + return data_with_shap + else: + explainer, pred = self.shap_explainer() + if type(explainer) == shap.explainers._tree.Tree: + #Complete! Do not change. + global_shap_values = explainer.shap_values(input_data) + data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values, + in_data=self.input_data.copy(), + scope="local") + data_with_shap['Model Decision'] = pred(input_data) + #data_with_shap['True Values'] = self.actual_data + + return data_with_shap + + else: + global_shap_values = explainer.shap_values(shap.sample(input_data,100)) + predictions = pred(shap.sample(input_data,100)) + data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values, + in_data=shap.sample(input_data,100).copy(), + scope="local") + data_with_shap['Model Decision'] = pred(shap.sample(self.input_data,100)) + #data_with_shap['True Values'] = self.actual_data + + return data_with_shap \ No newline at end of file diff --git a/lib/imports.py b/lib/imports.py index 6171ad8..7faee4c 100644 --- a/lib/imports.py +++ b/lib/imports.py @@ -31,6 +31,8 @@ from config_det import data_det from collections import deque from sklearn import metrics +from sklearn.base import is_classifier, is_regressor +import pytest firebase_app = pyrebase.initialize_app(data_det) ref = firebase_app.database() \ No newline at end of file diff --git a/lib/models/__init__.py b/lib/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/models/modelprocessor.py b/lib/models/modelprocessor.py new file mode 100644 index 0000000..ac62a89 --- /dev/null +++ b/lib/models/modelprocessor.py @@ -0,0 +1,322 @@ +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +import numpy as np +import pandas as pd +from sklearn.preprocessing import LabelEncoder +from typing import Dict, List, Union +from sklearn.impute import SimpleImputer +from explainx.lib.utils import is_classification + +class ModelProcessor(): + def __init__(self, model, x_test, y_test, ct): + + super(ModelProcessor, self).__init__() + + self.predicted_columns = {} + self.model = model + self.input_data = x_test + self.target_data = y_test + self.ct = ct + + def num_cat_variables(self, data): + """ [Get categorical & numerical columns] + + Args: + x_test + + Return: + category_columns + numerical_columns + """ + is_cat = np.array([dt.kind == "O" for dt in data.dtypes]) + cat_cols = data.columns.values[is_cat] + num_cols = data.columns.values[~is_cat] + return cat_cols, num_cols + + + + def gen_category_map(input_data: Union[pd.DataFrame, np.ndarray], + categorical_columns: Union[List[int], List[str], None] = None) -> Dict[int, list]: + """ + Parameters + ---------- + data + 2-dimensional pandas dataframe or numpy array. + categorical_columns + A list of columns indicating categorical variables. Optional if passing a pandas dataframe as inference will + be used based on dtype 'O'. If passing a numpy array this is compulsory. + Returns + ------- + category_map + A dictionary with keys being the indices of the categorical columns and values being lists of categories for + that column. Implicitly each category is mapped to the index of its position in the list. + """ + + if input_data.ndim != 2: + raise TypeError('Expected a 2-dimensional dataframe or array') + n_features = input_data.shape[1] + + if isinstance(input_data, np.ndarray): + # if numpy array, we need categorical_columns, otherwise impossible to infer + if categorical_columns is None: + raise ValueError('If passing a numpy array, `categorical_columns` is required') + elif not all(isinstance(ix, int) for ix in categorical_columns): + raise ValueError('If passing a numpy array, `categorical_columns` must be a list of integers') + input_data = pd.DataFrame(input_data) + + # infer categorical columns + if categorical_columns is None: + try: + categorical_columns = [i for i in range(n_features) if input_data.iloc[:, i].dtype == 'O'] # NB: 'O' + except AttributeError: + raise + + # create the map + category_map = {} + for col in categorical_columns: + if not isinstance(col, int): + col = int(input_data.columns.get_loc(col)) + le = LabelEncoder() + try: + _ = le.fit_transform(input_data.iloc[:, col]) + except (AttributeError, IndexError): + raise + + category_map[col] = list(le.classes_) + + return category_map + + def columnsTransformer(self): + ''' + Input: + data : dataframe or x_test + Column Transformer: Sklearn ColumnTransformer that applies transformers to columns of an array or pandas DataFrame. + + Returns: + Column Transformer function after fit. + ''' + from sklearn.pipeline import Pipeline + from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer + from sklearn.compose import ColumnTransformer + + category_cols, numerical_cols = self.num_cat_variables(self.input_data) + category_map = self.gen_category_map(self.input_data, category_cols) + + if self.ct == None: + pass + elif self.ct == "default": + ordinal_features = [x for x in range(len(self.input_data.columns)) if x not in list(category_map.keys())] + ordinal_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')), + ('scaler', StandardScaler())]) + + categorical_features = list(category_map.keys()) + categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')), + ('onehot', OneHotEncoder(drop='first', handle_unknown='error'))]) + + #pass the pipeline into the transformer + preprocessor = ColumnTransformer(transformers=[('num', ordinal_transformer, ordinal_features), + ('cat', categorical_transformer, categorical_features)]) + preprocessor.fit(self.input_data) + return preprocessor + else: + transformerFunction = ct.fit(self.input_data) + #transformed_xtest = ct.transform(x_test) + return transformerFunction + + def inbuiltColumnsTransformer(data): + ''' + Input: + data : dataframe or x_test + Column Transformer: Sklearn ColumnTransformer that applies transformers to columns of an array or pandas DataFrame. + + Returns: + Column Transformer function after fit. + ''' + from sklearn.pipeline import Pipeline + from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer + from sklearn.compose import ColumnTransformer + + category_cols, numerical_cols = ModelProcessor.num_cat_variables(data) + category_map = ModelProcessor.gen_category_map(data, category_cols) + + + ordinal_features = [x for x in range(len(data.columns)) if x not in list(category_map.keys())] + ordinal_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')), + ('scaler', StandardScaler())]) + + categorical_features = list(category_map.keys()) + categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')), + ('onehot', OneHotEncoder(drop='first', handle_unknown='error'))]) + + #pass the pipeline into the transformer + preprocessor = ColumnTransformer(transformers=[('num', ordinal_transformer, ordinal_features), + ('cat', categorical_transformer, categorical_features)]) + #preprocessor.fit(data) + return preprocessor + + + def data_into_model(self): + ''' [Transformed data that is directly passed into the model] + Args: + data : dataframe or x_test + Column Transformer: Sklearn ColumnTransformer that applies transformers to columns of an array or pandas DataFrame. + + Returns: + x_test_proc: transformed dataset to be passed into the model predict function + ''' + from sklearn.pipeline import Pipeline + from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer + from sklearn.compose import ColumnTransformer + + if self.ct == None: + pass + + elif self.ct=="default": + x_test_proc = self.columnsTransformer().transform() + return x_test_proc + + else: + #transformerFunction = ct.fit(x_test) + x_test_proc = self.ct.transform(self.input_data) + return x_test_proc + + def make_predictions(self): + """ [Initiate the prediction function] + + Args: + Model + input_data + target_data + + Return: + prediction column + probabilities [if is_classifier] + + """ + if is_classification(self.model): + if self.ct == None: + prediction = self.model.predict(self.input_data.to_numpy()) + probabilities = self.model.predict_proba(self.input_data.to_numpy()) + return prediction, probabilities + elif self.ct != None: + prediction = self.model.predict(self.data_into_model()) + probabilities = self.model.predict_proba(self.data_into_model()) + return prediction, probabilities + else: + raise Exception(("{} not supported. Please create an issue on Github").format(self.model)) + + else: + if self.ct == None: + prediction = self.model.predict(self.input_data) + return prediction + elif self.ct != None: + prediction = self.model.predict(self.data_into_model()) + return prediction + else: + raise Exception(("{} not supported. Please create an issue on Github").format(self.self.model)) + + + def create_prediction_columns(self): + """ [Create prediction columns and add them to the self.predicted_columns dictionary] + Args: + model + x_test: + y_test + ColumnTransformer + """ + if is_classification(self.model) == True: + prediction, probabilities = self.make_predictions() + self.predicted_columns['Model Decision'] = prediction + self.predicted_columns['True Values'] = self.target_data + for i in range(len(np.unique(prediction))): + self.predicted_columns['Probability: {}'.format(np.unique(prediction)[i])] = probabilities[:,i] + + else: + prediction = self.make_predictions() + self.predicted_columns['Model Decision'] = prediction + self.predicted_columns['True Values'] = self.target_data + + + def log_metrics(self): + if is_classification(self.model) == True: + predict, _ = self.make_predictions() + metrics = self.classification_metrics(self.target_data, predict) + return metrics + else: + predict = self.make_predictions() + metrics = self.regression_metrics(self.target_data, predict) + return metrics + + def classification_metrics(self, target_data, predicted): + """[Calculates the metrics for classification problems] + + Args: + y_true ([type]): [True labels from the dataset] + predicted ([type]): [Predicted values from the model] + + Returns: + Accuracy metric of the model + Precision value of the model + Recall value of the model + False Positive Rate + False Negative Rate + """ + from sklearn import preprocessing + from sklearn import metrics + + y_true_copy, predictions = pd.DataFrame(self.target_data), predicted + #y_true_copy.unique() + np.unique(y_true_copy) + encode = {} + for i in range(len(np.unique(y_true_copy))): + encode[np.unique(y_true_copy)[i]] = i + + predicted_copy = [encode[i] for i in predictions] + + y_true_copy.replace(encode, inplace=True) + + if len(y_true_copy) != 0: + #Accuracy + accuracy = round(metrics.accuracy_score(y_true_copy, predicted_copy),2) + #Precision + precision = round(metrics.precision_score(y_true_copy, predicted_copy, zero_division=1),2) + #Recall + recall = round(metrics.recall_score(y_true_copy, predicted_copy, zero_division=1),2) + tn, fp, fn, tp = metrics.confusion_matrix(y_true_copy, predicted_copy).ravel() + #False Positive Rate (FPR) + fpr = round((fp/(fp+tn)),2) + #Flase Negative Rate (FNR) + fnr = round((fn/(tp+fn) if (tp+fn) else 0),2) + results = {'accuracy':accuracy, 'precision':precision, 'recall':recall, 'fpr': fpr, 'fnr':fnr} + return results + else: + raise Exception("Metrics calculation failed") + + def regression_metrics(self, target_data, predicted): + """[Calculates the metrics for regression problems] + + Args: + y_true ([type]): [True labels from the dataset] + predicted ([type]): [Predicted values from the model] + + Returns: + Mean Absolute Error + Mean Squared Error + R-Squared Value + """ + from sklearn import metrics + if len(target_data) != 0: + #Mean Absolute Error + mae = round(metrics.mean_absolute_error(target_data, predicted),2) + #Mean Squared Error + mse = round(metrics.mean_squared_error(target_data, predicted),2) + #R2 + r2 = round(metrics.r2_score(target_data, predicted),2) + results = {'mae':mae, 'mse':mse, 'R2':r2} + return results + else: + raise Exception("Metrics calculation failed") + + +# if __name__ == '__main__': +# a_game = ModelProcessor() +# a_game.run() \ No newline at end of file diff --git a/lib/models/test/__init__.py b/lib/models/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/models/test/test_model.py b/lib/models/test/test_model.py new file mode 100644 index 0000000..81f2943 --- /dev/null +++ b/lib/models/test/test_model.py @@ -0,0 +1,30 @@ +# Copyright (c) 2020 explainX.ai +# Distributed under the MIT software license + +import pytest +from modelprocessor import ModelProcessor + +from sklearn.base import is_classifier, is_regressor +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor +from sklearn.neural_network import MLPClassifier, MLPRegressor + +classifiers = [RandomForestClassifier(), MLPClassifier()] +regressors = [RandomForestRegressor(),AdaBoostRegressor(), MLPRegressor(), GradientBoostingRegressor()] + +def test_classification(): + try: + for model in classifiers: + assert ModelProcessor().is_classification(model) == True + except Exception: + pytest.fail("Not a classifier!") + +def test_not_classification(): + try: + for model in regressors: + assert ModelProcessor().is_classification(model) == False + except Exception: + pytest.fail("Not a regressor!") + + + + diff --git a/lib/models/transformer_default.py b/lib/models/transformer_default.py new file mode 100644 index 0000000..8d7c7a7 --- /dev/null +++ b/lib/models/transformer_default.py @@ -0,0 +1,106 @@ +import numpy as np +import pandas as pd +from sklearn.preprocessing import LabelEncoder +from typing import Dict, List, Union +from sklearn.impute import SimpleImputer + +class defaultTransformer(): + def __init__(self, data): + self.data = data + + def num_cat_variables(self): + """ [Get categorical & numerical columns] + + Args: + x_test + + Return: + category_columns + numerical_columns + """ + is_cat = np.array([dt.kind == "O" for dt in self.data.dtypes]) + cat_cols = self.data.columns.values[is_cat] + num_cols = self.data.columns.values[~is_cat] + return cat_cols, num_cols + + def gen_category_map(self, input_data: Union[pd.DataFrame, np.ndarray], + categorical_columns: Union[List[int], List[str], None] = None) -> Dict[int, list]: + """ + Parameters + ---------- + data + 2-dimensional pandas dataframe or numpy array. + categorical_columns + A list of columns indicating categorical variables. Optional if passing a pandas dataframe as inference will + be used based on dtype 'O'. If passing a numpy array this is compulsory. + Returns + ------- + category_map + A dictionary with keys being the indices of the categorical columns and values being lists of categories for + that column. Implicitly each category is mapped to the index of its position in the list. + """ + + if input_data.ndim != 2: + raise TypeError('Expected a 2-dimensional dataframe or array') + n_features = input_data.shape[1] + + if isinstance(input_data, np.ndarray): + # if numpy array, we need categorical_columns, otherwise impossible to infer + if categorical_columns is None: + raise ValueError('If passing a numpy array, `categorical_columns` is required') + elif not all(isinstance(ix, int) for ix in categorical_columns): + raise ValueError('If passing a numpy array, `categorical_columns` must be a list of integers') + input_data = pd.DataFrame(input_data) + + # infer categorical columns + if categorical_columns is None: + try: + categorical_columns = [i for i in range(n_features) if input_data.iloc[:, i].dtype == 'O'] # NB: 'O' + except AttributeError: + raise + + # create the map + category_map = {} + for col in categorical_columns: + if not isinstance(col, int): + col = int(input_data.columns.get_loc(col)) + le = LabelEncoder() + try: + _ = le.fit_transform(input_data.iloc[:, col]) + except (AttributeError, IndexError): + raise + + category_map[col] = list(le.classes_) + + return category_map + + def column_processor(self): + ''' + Input: + data : dataframe or x_test + Column Transformer: Sklearn ColumnTransformer that applies transformers to columns of an array or pandas DataFrame. + + Returns: + Column Transformer function after fit. + ''' + from sklearn.pipeline import Pipeline + from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer + from sklearn.compose import ColumnTransformer + + category_cols, numerical_cols = self.num_cat_variables() + category_map = self.gen_category_map(self.data, category_cols) + + + ordinal_features = [x for x in range(len(self.data.columns)) if x not in list(category_map.keys())] + ordinal_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')), + ('scaler', StandardScaler())]) + + categorical_features = list(category_map.keys()) + categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')), + ('onehot', OneHotEncoder(drop='first', handle_unknown='error'))]) + + #pass the pipeline into the transformer + preprocessor = ColumnTransformer(transformers=[('num', ordinal_transformer, ordinal_features), + ('cat', categorical_transformer, categorical_features)]) + #preprocessor.fit(data) + return preprocessor \ No newline at end of file diff --git a/lib/modules/__init__.py b/lib/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/modules/cohort_analysis/__init__.py b/lib/modules/cohort_analysis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/modules/cohort_analysis/apps/cohort_app.py b/lib/modules/cohort_analysis/apps/cohort_app.py new file mode 100644 index 0000000..e88a271 --- /dev/null +++ b/lib/modules/cohort_analysis/apps/cohort_app.py @@ -0,0 +1,185 @@ +from .cohort_class import cohortAnalysis +import dash_core_components as dcc +import dash_html_components as html +from explainx.lib.utils import is_classification +from explainx.lib.imports import * + +def test_func(x_test, model, app): + + ca = cohortAnalysis(model) + + var_name_dropdown = html.Div([ + html.P("Choose Variable"), + dcc.Dropdown( + id='demo-dropdown', + options=[{'label': i, 'value': i} for i in x_test.columns], + value= "", + clearable=False + ) + ]) + + + operators_list = ["==","=",">","<",">=","<="] + + operators_dropdown = html.Div([ + html.P("Choose Operator"), + dcc.Dropdown(id="demo-operators", + options=[{"label":i, "value":i} for i in operators_list], + value = "", + clearable=False + ) + ]) + + value_input = html.Div([ + html.P("Enter Value"), + html.Div(id="demo-test") + ]) + + def signal(is_classification): + if is_classification(model) == True: + return x_test.columns[-4:] + else: + return x_test.columns[-2:] + + x_axis_dropdown = html.Div([ + html.P("Choose X-Axis Variable"), + dcc.Dropdown(id="x-axis", + options = [{"label":i, "value":i} for i in signal(is_classification)], + value = x_test.columns[-2], + clearable=False) + ], style={"width":"30%", "padding-left":"20px"}) + + + modal = html.Div( + [ + dbc.Modal( + [ + dbc.ModalHeader("Cohort Analysis"), + dbc.ModalBody( + html.Div( + [var_name_dropdown, + operators_dropdown, + value_input + ], id="modal_body") + ), + dbc.ModalFooter([ + dbc.Button("Add Cohort", id="add-cohort", n_clicks=3), + dbc.Button("Close", id="close", className="ml-auto") + ])], + id="modal", + ), + ], id="modal-parent" + ) + + button = dbc.Button("Add Cohort", id="open") + + + remove_button = dbc.Button("Remove Cohort", id="remove-cohort", style={"margin-left":"20px"}) + + cohort_details = html.Div(id="cohort-details", children=[], style={"display":"flex"}) + + cohort_metrics_div = html.Div(id="cohort-metrics-div", children = [], style={"display":"flex"}) + + heading = html.H3("Evaluate Model Performance - Cohort Analysis", style={"padding-left":"20px", "padding-top":"20px"}) + details = html.P("Evaluate the performance of your model by exploring the distribution of your prediction value and the values of your model performance metrics. You can further investigate your model by looking at a comparative analysis of its performance across different cohorts or subgroups of your dataset. Select filters along y-value and x-value to cut across different dimensions.", style={"padding-left":'20px'}) + + card = dbc.Card( + [ + dbc.CardHeader( + dbc.Tabs( + [ + dbc.Tab(label="Cohort Distribution", tab_id="tab-1"), + dbc.Tab(label="Dataset Explorer", tab_id="tab-2"), + ], + id="card-tabs", + card=True, + active_tab="tab-1", + ) + ), + dbc.CardBody(html.P(id="card-content", className="card-text")), + ] + ) + + + layout = html.Div( + [ + heading, + details, + html.Div([button, remove_button],style={"padding":"20px", "display":"flex"}), + cohort_details, + cohort_metrics_div, + modal, + card, + + + ], id="main" + ) + + + + @app.callback( + Output("card-content", "children"), + [Input("card-tabs", "active_tab")] + ) + def tab_content(active_tab): + if active_tab == 'tab-1': + div = html.Div([ + html.Div(x_axis_dropdown), + html.Div(id="cohort-graph")], style={"display":"block"}) + return div + else: + return "This is tab {}".format(active_tab) + + @app.callback( + Output("modal", "is_open"), + [Input("open", "n_clicks"), Input("close", "n_clicks")], + [State("modal", "is_open")], + ) + def toggle_modal(n1, n2, is_open): + if n1 or n2: + return not is_open + return is_open + + + #new callback added. + @app.callback(Output('demo-test', "children"), + [Input('demo-dropdown','value')]) + def categorical_support(value): + is_cat = np.array([dt.kind == "O" for dt in x_test.dtypes]) + category_cols = x_test.columns.values[is_cat] + if value in category_cols: + return dcc.Dropdown(id="demo-values", + options=[{"label":i, "value":i} for i in x_test[value].unique()], + value = "", + clearable=False) + else: + return dcc.Input(id="demo-values", + type="text", + value="", + debounce=True) + + @app.callback( + [Output("cohort-metrics-div", "children"), + Output("cohort-details", "children"), + Output("cohort-graph", "children")], + [Input("add-cohort","n_clicks"), + Input("remove-cohort","n_clicks"), + Input("x-axis","value")], + [State("demo-dropdown","value"), + State("demo-operators", "value"), + State("demo-values", "value")], + ) + def cohort_metrics_details(add_cohort, remove_cohort, x_axis, var_name, operator, value): + changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0] + if 'remove-cohort' in changed_id: + ca.remove_cohort() + fig = ca.cohort_graph(x_axis) + return ca.cohort_metrics_details(), ca.cohort_details(), dcc.Graph(figure=fig), + + else: + ca.add_cohort_metrics(x_test, var_name, operator,value) + cohort = ca.add_cohort(x_test, x_axis, var_name, operator, value) + fig = ca.cohort_graph(x_axis) + return ca.cohort_metrics_details(), ca.cohort_details(), dcc.Graph(figure=fig) + + return layout diff --git a/lib/modules/cohort_analysis/apps/cohort_class.py b/lib/modules/cohort_analysis/apps/cohort_class.py new file mode 100644 index 0000000..7e6fb89 --- /dev/null +++ b/lib/modules/cohort_analysis/apps/cohort_class.py @@ -0,0 +1,253 @@ +from sklearn import metrics +import dash_html_components as html +import plotly.graph_objects as go +from explainx.lib.utils import is_classification + +class cohortAnalysis(): + def __init__(self, model): + + self.cohorts = {} + self.cohort_metrics = [] + self.cohort_set = {} + self.model = model + + + def filtered_dataframe(self, df, filter_variable, var_name="", operator="", value=""): + """ + data = main_data + name = cohort_name + var_name = name of the variable to slice/dice + operator: >, <, =, >=, <= + value = value of the variable + + returns main_data: filtered dataset with just the probabilities + name: filtered dataset with the condition + """ + main_dataset = df[filter_variable] + if (var_name != "") or (operator != "") or (value != ""): + if len(df[filter_variable]) != 0: + if type(value) != 'str': + try: + name = df.query("{} {} '{}'".format(var_name, operator, value))[filter_variable] + except: + name = df.query("{} {} {}".format(var_name, operator, value))[filter_variable] + condition = str(var_name)+str(operator)+str(value) + return main_dataset, name, condition + else: + pass + else: + if len(df[filter_variable]) != 0: + condition = "All Data" + return main_dataset, condition + else: + pass + + def add_cohort(self, df, filter_variable, var_name="", operator="", value=""): + if (var_name != "") or (operator != "") or (value != ""): + main_dataset, name, condition = self.filtered_dataframe(df,filter_variable,var_name,operator,value) + self.cohorts[condition] = name + else: + main_dataset, condition = self.filtered_dataframe(df, filter_variable) + self.cohorts[condition] = main_dataset + + def remove_cohort(self): + if (len(self.cohorts) >1) and (len(self.cohort_set) > 1): + self.cohorts.popitem() + self.cohort_set.popitem() + else: + pass + + def add_cohort_metrics(self, df, var_name="", operator="", value=""): + """ + data = main_data + name = cohort_name + var_name = name of the variable to slice/dice + operator: >, <, =, >=, <= + value = value of the variable + + """ + if value != "": + #Extract filtered predicted values + _, predicted, condition_predict = self.filtered_dataframe(df, "Model Decision",var_name,operator,value) + #Extract filtered true labels + _, true_values, condition_true = self.filtered_dataframe(df, "True Values", var_name, operator, value) + #calculate metrics + if is_classification(self.model) is True: + if len(true_values) != 0: + accuracy, precision, recall, fpr, fnr = self.classification_cohort_metrics(true_values, predicted) + self.cohort_set[condition_predict] = self.generate_classification_divs(accuracy, precision, recall, fpr, fnr) + else: + pass + else: + if len(true_values) != 0: + mae, mse, r2 = self.regression_cohort_metrics(true_values, predicted) + #save these metrics to an array + self.cohort_set[condition_predict] = self.generator_regression_divs(mae, mse, r2) + else: + pass + else: + main_dataset, condition = self.filtered_dataframe(df, "Model Decision") + true_data, _ = self.filtered_dataframe(df, "True Values") + if is_classification(self.model) is True: + if len(true_data) != 0: + accuracy, precision, recall, fpr, fnr = self.classification_cohort_metrics(true_data,main_dataset) + self.cohort_set[condition] = self.generate_classification_divs(accuracy, precision, recall, fpr, fnr) + else: + pass + else: + if len(true_data) != 0: + mae, mse, r2 = self.regression_cohort_metrics(true_data, main_dataset) + #save these metrics to an array + self.cohort_set[condition] = self.generator_regression_divs(mae, mse, r2) + else: + pass + + def generate_classification_divs(self, accuracy, precision, recall, fpr, fnr): + metrics_div = [html.Div("Accuracy: {}".format(accuracy)), + html.Div("Precision: {}".format(precision)), + html.Div("Recall: {}".format(recall)), + html.Div("fpr: {}".format(fpr)), + html.Div("fnr: {}".format(fnr)) + ] + return metrics_div + + def generator_regression_divs(self, mae, mse, r2): + metrics_div = [html.Div("MAE : {}".format(mae)), + html.Div("MSE : {}".format(mse)), + html.Div("R2: {}".format(r2))] + return metrics_div + + def cohort_details(self): + """ + Cohort Name + Length of Cohort + """ + length_dict = {key: len(value) for key, value in self.cohorts.items()} + divs = [] + for i in range(len(length_dict)): + if list(length_dict.values())[i] != 0: + first_html = html.Div(list(length_dict.keys())[i]) + second_html = html.Div(str(list(length_dict.values())[i])+" datapoints") + divs.append(html.Div([first_html,second_html], style={"padding-left":"20px","padding-right":"20px","padding-bottom":"0px","width":"200px"})) + else: + pass + return divs + + def cohort_metrics_details(self): + """ + Cohort Name + Metrics + """ + length_dict = {key: value for key, value in self.cohort_set.items()} + div_metrics = [] + for i in range(len(length_dict)): + div_metrics.append(html.Div(list(length_dict.values())[i], style={"padding-left":"20px","padding-right":"20px","padding-bottom":"0px","width":"200px"})) + return div_metrics + + + def cohort_graph(self, filter_variable): + """[This function generators the box plot for the cohorts. This is operated directly from the frontend.] + + Args: + filter_variable ([string]): [This variable is x-axis value of the graph. It can be either probabilities or model prediction values] + + Returns: + [figure]: [box plot graph] + """ + + X_Value = str(filter_variable) + Y_Value = 'Cohorts' + + fig = go.Figure() + + for k, v in self.cohorts.items(): + fig.add_trace(go.Box(x=v, name=k)) + + fig.update_layout( + yaxis_title = Y_Value, + xaxis_title = X_Value, + template = "plotly_white", + font=dict( + size=8, + ) + ) + fig.update_layout(legend=dict( + orientation="h", + yanchor="bottom", + y=1.02, + xanchor="right", + x=1 + )) + + fig.update_layout( + margin={'t': 0}, + ) + return fig + + def classification_cohort_metrics(self, y_true, predicted): + """[Calculates the metrics for classification problems] + + Args: + y_true ([type]): [True labels from the dataset] + predicted ([type]): [Predicted values from the model] + + Returns: + Accuracy metric of the model + Precision value of the model + Recall value of the model + False Positive Rate + False Negative Rate + """ + from sklearn import preprocessing + + y_true_copy, predicted_copy = y_true, predicted + y_true_copy.unique() + + encode = {} + for i in range(len(y_true_copy.unique())): + encode[y_true_copy.unique()[i]] = i + + + y_true_copy.replace(encode, inplace=True) + predicted_copy.replace(encode, inplace=True) + + if len(y_true_copy) != 0: + #Accuracy + accuracy = round(metrics.accuracy_score(y_true_copy, predicted_copy),2) #Accuracy classification score. + #Precision + precision = round(metrics.precision_score(y_true_copy, predicted_copy, zero_division=1),2) #Compute the precision + #Recall + recall = round(metrics.recall_score(y_true_copy, predicted_copy, zero_division=1),2) #Compute the recall + #False Positive Rate (FPR) + tn, fp, fn, tp = metrics.confusion_matrix(y_true_copy, predicted_copy).ravel() #Compute confusion matrix to evaluate the accuracy of a classification. + #False Negative Rate (FNR) + fpr = round((fp/(fp+tn)),2) + fnr = round((fn/(tp+fn) if (tp+fn) else 0),2) + + return accuracy, precision, recall, fpr, fnr + else: + pass + + def regression_cohort_metrics(self, y_true, predicted): + """[Calculates the metrics for regression problems] + + Args: + y_true ([type]): [True labels from the dataset] + predicted ([type]): [Predicted values from the model] + + Returns: + Mean Absolute Error + Mean Squared Error + R-Squared Value + """ + if len(y_true) != 0: + #Mean Absolute Error + mae = round(metrics.mean_absolute_error(y_true, predicted),2) + #Mean Squared Error + mse = round(metrics.mean_squared_error(y_true, predicted),2) + #R2 + r2 = round(metrics.r2_score(y_true, predicted),2) + + return mae, mse, r2 + else: + pass diff --git a/lib/modules/cohort_analysis/assets/favicon.ico b/lib/modules/cohort_analysis/assets/favicon.ico new file mode 100644 index 0000000..1224c37 Binary files /dev/null and b/lib/modules/cohort_analysis/assets/favicon.ico differ diff --git a/lib/modules/cohort_analysis/assets/topography.css b/lib/modules/cohort_analysis/assets/topography.css new file mode 100644 index 0000000..0663147 --- /dev/null +++ b/lib/modules/cohort_analysis/assets/topography.css @@ -0,0 +1,623 @@ +/* Table of contents +–––––––––––––––––––––––––––––––––––––––––––––––––– +- Plotly.js +- Grid +- Base Styles +- Typography +- Links +- Buttons +- Forms +- Lists +- Code +- Tables +- Spacing +- Utilities +- Clearing +- Media Queries +*/ + +/* Grid +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.container { + position: relative; + width: 100%; + max-width: 960px; + margin: 0 auto; + padding: 0 20px; + box-sizing: border-box; } +.column, +.columns { + width: 100%; + float: left; + box-sizing: border-box; } + +/* For devices larger than 400px */ +@media (min-width: 400px) { + .container { + width: 85%; + padding: 0; } +} + +/* For devices larger than 550px */ +@media (min-width: 550px) { + .container { + width: 80%; + } + .column, + .columns { + margin-left: 4%; } + .column:first-child, + .columns:first-child { + margin-left: 0; } + + .one.column, + .one.columns { width: 4.66666666667%; } + .two.columns { width: 13.3333333333%; } + .three.columns { width: 22%; } + .four.columns { width: 30.6666666667%; } + .five.columns { width: 39.3333333333%; } + .six.columns { width: 48%; } + .seven.columns { width: 56.6666666667%; } + .eight.columns { width: 65.3333333333%; } + .nine.columns { width: 74.0%; } + .ten.columns { width: 82.6666666667%; } + .eleven.columns { width: 91.3333333333%; } + .twelve.columns { width: 98%; margin-left: 0; margin-right: 0;} + + .one-third.column { width: 30.6666666667%; } + .two-thirds.column { width: 65.3333333333%; } + + .one-half.column { width: 48%; } + + /* Offsets */ + .offset-by-one.column, + .offset-by-one.columns { margin-left: 8.66666666667%; } + .offset-by-two.column, + .offset-by-two.columns { margin-left: 17.3333333333%; } + .offset-by-three.column, + .offset-by-three.columns { margin-left: 26%; } + .offset-by-four.column, + .offset-by-four.columns { margin-left: 34.6666666667%; } + .offset-by-five.column, + .offset-by-five.columns { margin-left: 43.3333333333%; } + .offset-by-six.column, + .offset-by-six.columns { margin-left: 52%; } + .offset-by-seven.column, + .offset-by-seven.columns { margin-left: 60.6666666667%; } + .offset-by-eight.column, + .offset-by-eight.columns { margin-left: 69.3333333333%; } + .offset-by-nine.column, + .offset-by-nine.columns { margin-left: 78.0%; } + .offset-by-ten.column, + .offset-by-ten.columns { margin-left: 86.6666666667%; } + .offset-by-eleven.column, + .offset-by-eleven.columns { margin-left: 95.3333333333%; } + + .offset-by-one-third.column, + .offset-by-one-third.columns { margin-left: 34.6666666667%; } + .offset-by-two-thirds.column, + .offset-by-two-thirds.columns { margin-left: 69.3333333333%; } + + .offset-by-one-half.column, + .offset-by-one-half.columns { margin-left: 52%; } + +} + + +/* Base Styles +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +/* NOTE +html is set to 62.5% so that all the REM measurements throughout Skeleton +are based on 10px sizing. So basically 1.5rem = 15px :) */ +html { + font-size: 62.5%; } +body { + font-size: 1.2em; /* currently ems cause chrome bug misinterpreting rems on body element */ + line-height: 1.6; + font-weight: 400; +/* font-family: "Open Sans", "HelveticaNeue", "Helvetica Neue", Helvetica, Arial, sans-serif; */ + /* font-family: 'Montserrat'; */ + font-family: 'Montserrat', sans-serif; + color: #263238; /* Material blue-grey 900*/ + background-color: #fff; /* Material blue-grey 100*/ + margin: 0%; +} + + +/* Typography +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +h1, h2, h3, h4, h5, h6 { + margin-top: 0; + margin-bottom: 0; + font-weight: 300; } +h1 { font-size: 3.5rem; line-height: 1.2; letter-spacing: -.1rem; margin-bottom: 2rem; } +h2 { font-size: 2.6rem; line-height: 1.25; letter-spacing: -.1rem; margin-bottom: 1.8rem; margin-top: 1.8rem;} +h3 { font-size: 2.0rem; line-height: 1.3; letter-spacing: -.1rem; margin-bottom: 1.5rem; margin-top:4.5rem;} +h4 { font-size: 1.6rem; line-height: 1.35; letter-spacing: -.08rem; margin-bottom: 1.2rem; margin-top: 1.2rem;} +h5 { font-size: 1.2rem; line-height: 1.5; letter-spacing: -.05rem; margin-bottom: 0.6rem; margin-top: 0.6rem;} +h6 { font-size: 1.0rem; line-height: 1.6; letter-spacing: 0; margin-bottom: 0.75rem; margin-top: 0.75rem;} + +p { font-size: 1.1rem ; margin-top: 1rem; margin-bottom:1rem; overflow-wrap: break-word; margin-left:2px; margin-right:3px;} + + +/* Blockquotes +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +blockquote { + border-left: 4px lightgrey solid; + padding-left: 1rem; + margin-top: 2rem; + margin-bottom: 2rem; + margin-left: 0rem; +} + + +/* Links +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +a { + color: #1565c0; /* Material Blue 800 */ + text-decoration: underline; + cursor: pointer;} +a:hover { + color: #0d47a1; /* Material Blue 900 */ +} + + +/* Buttons +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.button, +button, +input[type="submit"], +input[type="reset"], +input[type="button"] { + display: inline-block; + height: 38px; + padding: 0 30px; + color: #455A64; /* Material blue-gray 700*/ + text-align: center; + font-size: 11px; + font-weight: 600; + line-height: 38px; + letter-spacing: .1rem; + text-transform: uppercase; + text-decoration: none; + white-space: nowrap; + background-color: transparent; + border-radius: 4px; + border: 1px solid #B0BEC5; /* Material blue-gray 200*/ + cursor: pointer; + box-sizing: border-box; } +.button:hover, +button:hover, +input[type="submit"]:hover, +input[type="reset"]:hover, +input[type="button"]:hover, +.button:focus, +button:focus, +input[type="submit"]:focus, +input[type="reset"]:focus, +input[type="button"]:focus { + color: #333; + border-color: #888; + outline: 0; } +.button.button-primary, +button.button-primary, +input[type="submit"].button-primary, +input[type="reset"].button-primary, +input[type="button"].button-primary { + color: #FFF; + background-color: #33C3F0; + border-color: #33C3F0; } +.button.button-primary:hover, +button.button-primary:hover, +input[type="submit"].button-primary:hover, +input[type="reset"].button-primary:hover, +input[type="button"].button-primary:hover, +.button.button-primary:focus, +button.button-primary:focus, +input[type="submit"].button-primary:focus, +input[type="reset"].button-primary:focus, +input[type="button"].button-primary:focus { + color: #FFF; + background-color: #1EAEDB; + border-color: #1EAEDB; } + + +/* Forms +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +input[type="email"], +input[type="number"], +input[type="search"], +input[type="text"], +input[type="tel"], +input[type="url"], +input[type="password"], +textarea, +select { + height: 38px; + padding: 6px 10px; /* The 6px vertically centers text on FF, ignored by Webkit */ + background-color: #fff; + border: 1px solid #D1D1D1; + border-radius: 4px; + box-shadow: none; + box-sizing: border-box; + font-family: inherit; + font-size: inherit; /*https://stackoverflow.com/questions/6080413/why-doesnt-input-inherit-the-font-from-body*/} +/* Removes awkward default styles on some inputs for iOS */ +input[type="email"], +input[type="number"], +input[type="search"], +input[type="text"], +input[type="tel"], +input[type="url"], +input[type="password"], +textarea { + -webkit-appearance: none; + -moz-appearance: none; + appearance: none; } +textarea { + min-height: 65px; + padding-top: 6px; + padding-bottom: 6px; } +input[type="email"]:focus, +input[type="number"]:focus, +input[type="search"]:focus, +input[type="text"]:focus, +input[type="tel"]:focus, +input[type="url"]:focus, +input[type="password"]:focus, +textarea:focus, +select:focus { + border: 1px solid #33C3F0; + outline: 0; } +label, +legend { + display: block; + margin-bottom: 0px; } +fieldset { + padding: 0; + border-width: 0; } +input[type="checkbox"], +input[type="radio"] { + display: inline; } +label > .label-body { + display: inline-block; + margin-left: .5rem; + font-weight: normal; } + + +/* Lists +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +ul { + list-style: circle inside; } +ol { + list-style: decimal inside; } +ol, ul { + padding-left: 0; + margin-top: 0; } +ul ul, +ul ol, +ol ol, +ol ul { + margin: 1.5rem 0 1.5rem 3rem; + font-size: 90%; } +li { + margin-bottom: 0; +} + +/* Tables +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +table { + border-collapse: collapse; +} +th:not(.CalendarDay), +td:not(.CalendarDay) { + padding: 12px 15px; + text-align: left; + border-bottom: 1px solid #E1E1E1; } +th:first-child:not(.CalendarDay), +td:first-child:not(.CalendarDay) { + padding-left: 0; } +th:last-child:not(.CalendarDay), +td:last-child:not(.CalendarDay) { + padding-right: 0; } + + +/* Spacing +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +button, +.button { + margin-bottom: 0rem; } +input, +textarea, +select, +fieldset { + margin-bottom: 0rem; } +pre, +dl, +figure, +table, +form { + margin-bottom: 0rem; } +p, +ul, +ol { + margin-bottom: 0.75rem; } + +/* Utilities +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.u-full-width { + width: 100%; + box-sizing: border-box; } +.u-max-full-width { + max-width: 100%; + box-sizing: border-box; } +.u-pull-right { + float: right; } +.u-pull-left { + float: left; } + + +/* Misc +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +hr { + margin-top: 3rem; + margin-bottom: 3.5rem; + border-width: 0; + border-top: 1px solid #E1E1E1; } + + +/* Clearing +–––––––––––––––––––––––––––––––––––––––––––––––––– */ + +/* Self Clearing Goodness */ +.container:after, +.row:after, +.u-cf { + content: ""; + display: table; + clear: both; } + + +/* Media Queries +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +/* +Note: The best way to structure the use of media queries is to create the queries +near the relevant code. For example, if you wanted to change the styles for buttons +on small devices, paste the mobile query code up in the buttons section and style it +there. +*/ + + +/* Larger than mobile */ +@media (min-width: 400px) {} + +/* Larger than phablet (also point when grid becomes active) */ +@media (min-width: 550px) {} + +/* Larger than tablet */ +@media (min-width: 750px) {} + +/* Larger than desktop */ +@media (min-width: 1000px) {} + +/* Larger than Desktop HD */ +@media (min-width: 1200px) {} + +/* Pretty container +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.pretty_container { + border-radius: 5px; + background-color: #f3f3f1; /* Mapbox light map land color */ + margin: 1%; + padding: 1%; + position: relative; + box-shadow: 1px 1px 1px slategrey; +} + +.container_title { + margin-top: 0; + margin-bottom: 0.2em; + font-size: 2.6rem; + line-height: 2.6rem; +} + +/* Customize Loading Spinner +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.dash-default-spinner { + margin-top: 0 !important; + margin-bottom: 0 !important; + padding: 50px; +} + +.dash-default-spinner > div { + background-color: #4e5964 !important; +} + +/* Special purpose buttons +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.reset-button { + width: 100%; + margin-top: 10px; + height: 30px; + line-height: 30px; +} + +.info-icon { + float: right; + cursor: pointer; + height: 2.2rem; + width: 2.2rem; + margin: 0.2rem; +} + + +/* Modal info layer +–––––––––––––––––––––––––––––––––––––––––––––––––– */ + + +.modal-content { + z-index: 1004; /* Sit on top, including modebar which has z=1001 */ + position: fixed; + left: 0; + width: 60%; + background-color: #fff; /* Material indigo 600 */ + color: black; + border-radius: 5px; + margin-left: 20%; + margin-bottom: 2%; + margin-top: 2%; +} + +.modal-content > div { + text-align: left; + margin: 15px; +} + +.modal-content.bottom { + bottom: 0; +} + +.modal-content.top { + top: 0; +} + +.place_form { + max-height: 740px; + max-width: 560px; + overflow: scroll; + overflow: scroll; + border-top: 0 0 10px 10px; + box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.19); + border-radius: 10px 10px 10px 10px; + padding: 10px 10px 10px 10px; + +} + +.form_feature { + font-size: 10px; + white-space: normal; + overflow-wrap: break-word; + width: 60%; + padding: 1px; + border-bottom: 1px solid #95a5a6; + padding-top: 20px; +} + + +.what_if_form_group { + display: flex; + width: 100%; + height: 40px; + +} + +.form_group_input { + font-size:10px; + width: 100%; + display: inline-block; + padding: 1px; + height: 40px; +} + + +.form_group_slider { + display: inline-block; + padding: 1px; + height: 40px; +} + +.form_group_dropdown { + font-size:10px; + width: 250%; + display: inline-block; + padding: 1px; + height: 40px; +} + + +.global_feature_importance_graph { + margin-left: 50; + margin-top: 0; + height: 100%; + font-size: 10px; +} + + +.modebar{ + display: none !important; +} + +.local_impact_heading { + padding-top: 2rem; + margin-left: 2rem; +} + +.local_impact_details { + margin: 0.5rem 1rem 3rem 2rem; +} + +.global_explanation_image { + max-width: 50%; +} + +.welcome_box { + display: flex; + justify-content: center; + align-items: center; + border-radius: 10px; + box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.19); + flex-direction: column; + padding: 20px; + margin: 20px; + width: 50%; + height: 50%; + +} + +.main_welcome_div { + display: flex; + width: 100%; + justify-content: center; + align-content: center; + align-items: center; +} + +.link { + margin-top: 20px; + max-width: 100%; + font-size: 15px; + +} + +.predicted_outcome { + padding-top: 0.1rem; + margin-left: 2rem; +} + +.model_confidence { + + margin-left: 2rem; +} + +.message_div { + display: flex; +} + +.main_div { + padding: 1% 1% 1% 1%; + +} + +.metrics_div { + display: flex; +} + +.insights_div_1 { + margin-left: 70px; +} + +.aggregate_imp { + padding-left: 20px; + padding-right: 20px; + +} \ No newline at end of file diff --git a/lib/modules/cohort_analysis/index.py b/lib/modules/cohort_analysis/index.py new file mode 100644 index 0000000..f21b292 --- /dev/null +++ b/lib/modules/cohort_analysis/index.py @@ -0,0 +1,42 @@ +import dash +import dash_core_components as dcc +import dash_html_components as html +from dash.dependencies import Input, Output +import dash +import dash_bootstrap_components as dbc +from jupyter_dash import JupyterDash +from .apps import cohort_app + + +class cohort(): + def __init__(self, x_test, model): + self.data = x_test + self.model = model + + def main_function(self, mode): + + external_stylesheets = ['https://raw.githubusercontent.com/rab657/explainx/master/explainx.css', + dbc.themes.BOOTSTRAP, + { + 'href': 'https://fonts.googleapis.com/css?family=Montserrat', + 'rel': 'stylesheet' + } + ] + cohort = JupyterDash(__name__, external_stylesheets=external_stylesheets, suppress_callback_exceptions=True) + + cohort.title = "explainX.ai - Model Performance Analysis" + + cohort.layout = cohort_app.test_func(self.data, self.model, cohort) + debug_value = False + if mode == None: + import random + port = random.randint(4000, 5000) + return cohort.run_server(port=port, debug=debug_value, dev_tools_ui=debug_value, + dev_tools_props_check=debug_value, dev_tools_silence_routes_logging=True, + dev_tools_hot_reload=True) + else: + import random + port = random.randint(4000, 5000) + return cohort.run_server(mode='inline', port=port, debug=debug_value, dev_tools_ui=debug_value, + dev_tools_props_check=debug_value, dev_tools_silence_routes_logging=True, + dev_tools_hot_reload=True) diff --git a/lib/modules/feature_interactions/__init__.py b/lib/modules/feature_interactions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/modules/feature_interactions/apps/feature_interaction.py b/lib/modules/feature_interactions/apps/feature_interaction.py new file mode 100644 index 0000000..d22f81f --- /dev/null +++ b/lib/modules/feature_interactions/apps/feature_interaction.py @@ -0,0 +1,113 @@ +from explainx.lib.imports import * +from explainx.lib.plotly_graphs import * +from explainx.lib.plotly_css import * +import dash_core_components as dcc +import dash_html_components as html + +def layout_interaction(x_test, df3, app): + + layout = html.Div([ + + html.Div([ + dcc.Loading( + id="feature_interaction_load", + type="circle", + children=html.Div([ + html.Div([ + html.H4('Partial Dependence Plot', + style=style12), + html.P( + 'The partial dependence plot (short PDP or PD plot) shows the marginal effect one or two features have on the predicted outcome of a machine learning model', + style=style13), + html.Div([ + html.P('Variable Name'), + dcc.Dropdown( + id='xaxis-column', + options = [{'label': i, 'value': i} for i in x_test.columns], + value = x_test.columns[1], + clearable=False + ), + + ],style=style22 + # style={'width': '20%', 'marginLeft': 70, 'float': 'left', + # 'display': 'inline-block'} + ), + + html.Div([ + html.P('Color Axis'), + dcc.Dropdown( + id='third-axis', + options=[{'label': i, 'value': i} for i in x_test.columns], + value=x_test.columns[-3], + clearable=False + ), + + ], style=style23), + + ]), + dcc.Loading( + id="loading-5", + type="circle", + children=dcc.Graph(id='indicator-graphic', style={'marginLeft': 50, 'marginTop':80}) + ), + ], + style=style16), + ), + dcc.Loading( + id="loading-2-pdp", + type='circle', + children=html.Div([ + + html.Div([ + html.H4('Summary Plot', + style=style17), + html.Div([ + dcc.Dropdown( + id='xaxis-column-test', + options=[{'label': i, 'value': i} for i in x_test.columns[1]], + value=x_test.columns[1], + clearable=False + ) + ], style={'display':'none'}), + html.P( + 'In the summary plot, we see first indications of the relationship between the value of a feature and the impact on the prediction', + style=style18) + , + + dcc.Graph(id='summary_plot', style={'marginLeft': 50, 'height': '600px'}) + + # ), + + ], style=style19), + ], style=style20) + ) + ]) + + ]) + + + # Partial Dependence Plot Graph + @app.callback( + Output('indicator-graphic', 'figure'), + [Input('xaxis-column', 'value'), + Input('third-axis', 'value')]) + def update_graph(xaxis_column_name, third_axis_name): + + g = plotly_graphs() + fig = g.pdp_plot(df3, df3[xaxis_column_name], df3[xaxis_column_name+"_impact"], df3[third_axis_name]) + return fig + + # Summary Plot + @app.callback( + Output('summary_plot', 'figure'), + [Input('xaxis-column', 'value')]) + def update_graph2(value): + + g = plotly_graphs() + graph_type = 'summary_plot' + #df3 = self.caching_data_manager(df3, sql_query, graph_type, g.summary_plot) + df4 = g.summary_plot(df3) + fig = g.summary_plot_graph(df4) + return fig + + return layout diff --git a/lib/modules/feature_interactions/assets/favicon.ico b/lib/modules/feature_interactions/assets/favicon.ico new file mode 100644 index 0000000..1224c37 Binary files /dev/null and b/lib/modules/feature_interactions/assets/favicon.ico differ diff --git a/lib/modules/feature_interactions/assets/topography.css b/lib/modules/feature_interactions/assets/topography.css new file mode 100644 index 0000000..0663147 --- /dev/null +++ b/lib/modules/feature_interactions/assets/topography.css @@ -0,0 +1,623 @@ +/* Table of contents +–––––––––––––––––––––––––––––––––––––––––––––––––– +- Plotly.js +- Grid +- Base Styles +- Typography +- Links +- Buttons +- Forms +- Lists +- Code +- Tables +- Spacing +- Utilities +- Clearing +- Media Queries +*/ + +/* Grid +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.container { + position: relative; + width: 100%; + max-width: 960px; + margin: 0 auto; + padding: 0 20px; + box-sizing: border-box; } +.column, +.columns { + width: 100%; + float: left; + box-sizing: border-box; } + +/* For devices larger than 400px */ +@media (min-width: 400px) { + .container { + width: 85%; + padding: 0; } +} + +/* For devices larger than 550px */ +@media (min-width: 550px) { + .container { + width: 80%; + } + .column, + .columns { + margin-left: 4%; } + .column:first-child, + .columns:first-child { + margin-left: 0; } + + .one.column, + .one.columns { width: 4.66666666667%; } + .two.columns { width: 13.3333333333%; } + .three.columns { width: 22%; } + .four.columns { width: 30.6666666667%; } + .five.columns { width: 39.3333333333%; } + .six.columns { width: 48%; } + .seven.columns { width: 56.6666666667%; } + .eight.columns { width: 65.3333333333%; } + .nine.columns { width: 74.0%; } + .ten.columns { width: 82.6666666667%; } + .eleven.columns { width: 91.3333333333%; } + .twelve.columns { width: 98%; margin-left: 0; margin-right: 0;} + + .one-third.column { width: 30.6666666667%; } + .two-thirds.column { width: 65.3333333333%; } + + .one-half.column { width: 48%; } + + /* Offsets */ + .offset-by-one.column, + .offset-by-one.columns { margin-left: 8.66666666667%; } + .offset-by-two.column, + .offset-by-two.columns { margin-left: 17.3333333333%; } + .offset-by-three.column, + .offset-by-three.columns { margin-left: 26%; } + .offset-by-four.column, + .offset-by-four.columns { margin-left: 34.6666666667%; } + .offset-by-five.column, + .offset-by-five.columns { margin-left: 43.3333333333%; } + .offset-by-six.column, + .offset-by-six.columns { margin-left: 52%; } + .offset-by-seven.column, + .offset-by-seven.columns { margin-left: 60.6666666667%; } + .offset-by-eight.column, + .offset-by-eight.columns { margin-left: 69.3333333333%; } + .offset-by-nine.column, + .offset-by-nine.columns { margin-left: 78.0%; } + .offset-by-ten.column, + .offset-by-ten.columns { margin-left: 86.6666666667%; } + .offset-by-eleven.column, + .offset-by-eleven.columns { margin-left: 95.3333333333%; } + + .offset-by-one-third.column, + .offset-by-one-third.columns { margin-left: 34.6666666667%; } + .offset-by-two-thirds.column, + .offset-by-two-thirds.columns { margin-left: 69.3333333333%; } + + .offset-by-one-half.column, + .offset-by-one-half.columns { margin-left: 52%; } + +} + + +/* Base Styles +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +/* NOTE +html is set to 62.5% so that all the REM measurements throughout Skeleton +are based on 10px sizing. So basically 1.5rem = 15px :) */ +html { + font-size: 62.5%; } +body { + font-size: 1.2em; /* currently ems cause chrome bug misinterpreting rems on body element */ + line-height: 1.6; + font-weight: 400; +/* font-family: "Open Sans", "HelveticaNeue", "Helvetica Neue", Helvetica, Arial, sans-serif; */ + /* font-family: 'Montserrat'; */ + font-family: 'Montserrat', sans-serif; + color: #263238; /* Material blue-grey 900*/ + background-color: #fff; /* Material blue-grey 100*/ + margin: 0%; +} + + +/* Typography +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +h1, h2, h3, h4, h5, h6 { + margin-top: 0; + margin-bottom: 0; + font-weight: 300; } +h1 { font-size: 3.5rem; line-height: 1.2; letter-spacing: -.1rem; margin-bottom: 2rem; } +h2 { font-size: 2.6rem; line-height: 1.25; letter-spacing: -.1rem; margin-bottom: 1.8rem; margin-top: 1.8rem;} +h3 { font-size: 2.0rem; line-height: 1.3; letter-spacing: -.1rem; margin-bottom: 1.5rem; margin-top:4.5rem;} +h4 { font-size: 1.6rem; line-height: 1.35; letter-spacing: -.08rem; margin-bottom: 1.2rem; margin-top: 1.2rem;} +h5 { font-size: 1.2rem; line-height: 1.5; letter-spacing: -.05rem; margin-bottom: 0.6rem; margin-top: 0.6rem;} +h6 { font-size: 1.0rem; line-height: 1.6; letter-spacing: 0; margin-bottom: 0.75rem; margin-top: 0.75rem;} + +p { font-size: 1.1rem ; margin-top: 1rem; margin-bottom:1rem; overflow-wrap: break-word; margin-left:2px; margin-right:3px;} + + +/* Blockquotes +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +blockquote { + border-left: 4px lightgrey solid; + padding-left: 1rem; + margin-top: 2rem; + margin-bottom: 2rem; + margin-left: 0rem; +} + + +/* Links +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +a { + color: #1565c0; /* Material Blue 800 */ + text-decoration: underline; + cursor: pointer;} +a:hover { + color: #0d47a1; /* Material Blue 900 */ +} + + +/* Buttons +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.button, +button, +input[type="submit"], +input[type="reset"], +input[type="button"] { + display: inline-block; + height: 38px; + padding: 0 30px; + color: #455A64; /* Material blue-gray 700*/ + text-align: center; + font-size: 11px; + font-weight: 600; + line-height: 38px; + letter-spacing: .1rem; + text-transform: uppercase; + text-decoration: none; + white-space: nowrap; + background-color: transparent; + border-radius: 4px; + border: 1px solid #B0BEC5; /* Material blue-gray 200*/ + cursor: pointer; + box-sizing: border-box; } +.button:hover, +button:hover, +input[type="submit"]:hover, +input[type="reset"]:hover, +input[type="button"]:hover, +.button:focus, +button:focus, +input[type="submit"]:focus, +input[type="reset"]:focus, +input[type="button"]:focus { + color: #333; + border-color: #888; + outline: 0; } +.button.button-primary, +button.button-primary, +input[type="submit"].button-primary, +input[type="reset"].button-primary, +input[type="button"].button-primary { + color: #FFF; + background-color: #33C3F0; + border-color: #33C3F0; } +.button.button-primary:hover, +button.button-primary:hover, +input[type="submit"].button-primary:hover, +input[type="reset"].button-primary:hover, +input[type="button"].button-primary:hover, +.button.button-primary:focus, +button.button-primary:focus, +input[type="submit"].button-primary:focus, +input[type="reset"].button-primary:focus, +input[type="button"].button-primary:focus { + color: #FFF; + background-color: #1EAEDB; + border-color: #1EAEDB; } + + +/* Forms +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +input[type="email"], +input[type="number"], +input[type="search"], +input[type="text"], +input[type="tel"], +input[type="url"], +input[type="password"], +textarea, +select { + height: 38px; + padding: 6px 10px; /* The 6px vertically centers text on FF, ignored by Webkit */ + background-color: #fff; + border: 1px solid #D1D1D1; + border-radius: 4px; + box-shadow: none; + box-sizing: border-box; + font-family: inherit; + font-size: inherit; /*https://stackoverflow.com/questions/6080413/why-doesnt-input-inherit-the-font-from-body*/} +/* Removes awkward default styles on some inputs for iOS */ +input[type="email"], +input[type="number"], +input[type="search"], +input[type="text"], +input[type="tel"], +input[type="url"], +input[type="password"], +textarea { + -webkit-appearance: none; + -moz-appearance: none; + appearance: none; } +textarea { + min-height: 65px; + padding-top: 6px; + padding-bottom: 6px; } +input[type="email"]:focus, +input[type="number"]:focus, +input[type="search"]:focus, +input[type="text"]:focus, +input[type="tel"]:focus, +input[type="url"]:focus, +input[type="password"]:focus, +textarea:focus, +select:focus { + border: 1px solid #33C3F0; + outline: 0; } +label, +legend { + display: block; + margin-bottom: 0px; } +fieldset { + padding: 0; + border-width: 0; } +input[type="checkbox"], +input[type="radio"] { + display: inline; } +label > .label-body { + display: inline-block; + margin-left: .5rem; + font-weight: normal; } + + +/* Lists +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +ul { + list-style: circle inside; } +ol { + list-style: decimal inside; } +ol, ul { + padding-left: 0; + margin-top: 0; } +ul ul, +ul ol, +ol ol, +ol ul { + margin: 1.5rem 0 1.5rem 3rem; + font-size: 90%; } +li { + margin-bottom: 0; +} + +/* Tables +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +table { + border-collapse: collapse; +} +th:not(.CalendarDay), +td:not(.CalendarDay) { + padding: 12px 15px; + text-align: left; + border-bottom: 1px solid #E1E1E1; } +th:first-child:not(.CalendarDay), +td:first-child:not(.CalendarDay) { + padding-left: 0; } +th:last-child:not(.CalendarDay), +td:last-child:not(.CalendarDay) { + padding-right: 0; } + + +/* Spacing +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +button, +.button { + margin-bottom: 0rem; } +input, +textarea, +select, +fieldset { + margin-bottom: 0rem; } +pre, +dl, +figure, +table, +form { + margin-bottom: 0rem; } +p, +ul, +ol { + margin-bottom: 0.75rem; } + +/* Utilities +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.u-full-width { + width: 100%; + box-sizing: border-box; } +.u-max-full-width { + max-width: 100%; + box-sizing: border-box; } +.u-pull-right { + float: right; } +.u-pull-left { + float: left; } + + +/* Misc +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +hr { + margin-top: 3rem; + margin-bottom: 3.5rem; + border-width: 0; + border-top: 1px solid #E1E1E1; } + + +/* Clearing +–––––––––––––––––––––––––––––––––––––––––––––––––– */ + +/* Self Clearing Goodness */ +.container:after, +.row:after, +.u-cf { + content: ""; + display: table; + clear: both; } + + +/* Media Queries +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +/* +Note: The best way to structure the use of media queries is to create the queries +near the relevant code. For example, if you wanted to change the styles for buttons +on small devices, paste the mobile query code up in the buttons section and style it +there. +*/ + + +/* Larger than mobile */ +@media (min-width: 400px) {} + +/* Larger than phablet (also point when grid becomes active) */ +@media (min-width: 550px) {} + +/* Larger than tablet */ +@media (min-width: 750px) {} + +/* Larger than desktop */ +@media (min-width: 1000px) {} + +/* Larger than Desktop HD */ +@media (min-width: 1200px) {} + +/* Pretty container +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.pretty_container { + border-radius: 5px; + background-color: #f3f3f1; /* Mapbox light map land color */ + margin: 1%; + padding: 1%; + position: relative; + box-shadow: 1px 1px 1px slategrey; +} + +.container_title { + margin-top: 0; + margin-bottom: 0.2em; + font-size: 2.6rem; + line-height: 2.6rem; +} + +/* Customize Loading Spinner +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.dash-default-spinner { + margin-top: 0 !important; + margin-bottom: 0 !important; + padding: 50px; +} + +.dash-default-spinner > div { + background-color: #4e5964 !important; +} + +/* Special purpose buttons +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.reset-button { + width: 100%; + margin-top: 10px; + height: 30px; + line-height: 30px; +} + +.info-icon { + float: right; + cursor: pointer; + height: 2.2rem; + width: 2.2rem; + margin: 0.2rem; +} + + +/* Modal info layer +–––––––––––––––––––––––––––––––––––––––––––––––––– */ + + +.modal-content { + z-index: 1004; /* Sit on top, including modebar which has z=1001 */ + position: fixed; + left: 0; + width: 60%; + background-color: #fff; /* Material indigo 600 */ + color: black; + border-radius: 5px; + margin-left: 20%; + margin-bottom: 2%; + margin-top: 2%; +} + +.modal-content > div { + text-align: left; + margin: 15px; +} + +.modal-content.bottom { + bottom: 0; +} + +.modal-content.top { + top: 0; +} + +.place_form { + max-height: 740px; + max-width: 560px; + overflow: scroll; + overflow: scroll; + border-top: 0 0 10px 10px; + box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.19); + border-radius: 10px 10px 10px 10px; + padding: 10px 10px 10px 10px; + +} + +.form_feature { + font-size: 10px; + white-space: normal; + overflow-wrap: break-word; + width: 60%; + padding: 1px; + border-bottom: 1px solid #95a5a6; + padding-top: 20px; +} + + +.what_if_form_group { + display: flex; + width: 100%; + height: 40px; + +} + +.form_group_input { + font-size:10px; + width: 100%; + display: inline-block; + padding: 1px; + height: 40px; +} + + +.form_group_slider { + display: inline-block; + padding: 1px; + height: 40px; +} + +.form_group_dropdown { + font-size:10px; + width: 250%; + display: inline-block; + padding: 1px; + height: 40px; +} + + +.global_feature_importance_graph { + margin-left: 50; + margin-top: 0; + height: 100%; + font-size: 10px; +} + + +.modebar{ + display: none !important; +} + +.local_impact_heading { + padding-top: 2rem; + margin-left: 2rem; +} + +.local_impact_details { + margin: 0.5rem 1rem 3rem 2rem; +} + +.global_explanation_image { + max-width: 50%; +} + +.welcome_box { + display: flex; + justify-content: center; + align-items: center; + border-radius: 10px; + box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.19); + flex-direction: column; + padding: 20px; + margin: 20px; + width: 50%; + height: 50%; + +} + +.main_welcome_div { + display: flex; + width: 100%; + justify-content: center; + align-content: center; + align-items: center; +} + +.link { + margin-top: 20px; + max-width: 100%; + font-size: 15px; + +} + +.predicted_outcome { + padding-top: 0.1rem; + margin-left: 2rem; +} + +.model_confidence { + + margin-left: 2rem; +} + +.message_div { + display: flex; +} + +.main_div { + padding: 1% 1% 1% 1%; + +} + +.metrics_div { + display: flex; +} + +.insights_div_1 { + margin-left: 70px; +} + +.aggregate_imp { + padding-left: 20px; + padding-right: 20px; + +} \ No newline at end of file diff --git a/lib/modules/feature_interactions/index.py b/lib/modules/feature_interactions/index.py new file mode 100644 index 0000000..f43bf5c --- /dev/null +++ b/lib/modules/feature_interactions/index.py @@ -0,0 +1,43 @@ +from explainx.lib.modules.feature_interactions.apps.feature_interaction import layout_interaction +import dash +import dash_core_components as dcc +import dash_html_components as html +from dash.dependencies import Input, Output +import dash +import dash_bootstrap_components as dbc +from jupyter_dash import JupyterDash + +#from app import app +from .apps import feature_interaction + + +class featureInteraction(): + def __init__(self, x_test, df_with_shap=None): + self.data = x_test + self.df_with_shap = df_with_shap + + def main_function(self, mode): + + external_stylesheets = ['https://raw.githubusercontent.com/rab657/explainx/master/explainx.css', + dbc.themes.BOOTSTRAP, + { + 'href':'https://fonts.googleapis.com/css?family=Montserrat', + 'rel' :'stylesheet' + } + ] + + local = JupyterDash(__name__, external_stylesheets=external_stylesheets, suppress_callback_exceptions=True) + + local.title = "explainX.ai - Feature Interaction" + + local.layout = feature_interaction.layout_interaction(self.data, self.df_with_shap, local) + + if mode == None: + import random + port = random.randint(5000,6000) + return local.run_server(port=port) + else: + import random + port = random.randint(5000,6000) + return local.run_server(mode='inline', port=port) + \ No newline at end of file diff --git a/lib/modules/imports.py b/lib/modules/imports.py new file mode 100644 index 0000000..7faee4c --- /dev/null +++ b/lib/modules/imports.py @@ -0,0 +1,38 @@ +from jupyter_dash import JupyterDash +import dash_bootstrap_components as dbc +import dash +import dash_core_components as dcc +import dash_core_components +import dash_html_components as html +from dash.dependencies import Input, Output, State +import plotly.express as px +import plotly.graph_objects as go +from plotly.graph_objs.scatter.marker import Line +import plotly.figure_factory as ff +import dash_table +import pandas as pd +import numpy as np +import dash_bootstrap_components as dbc +import time +import shap +import dash_editor_components +import socket +from contextlib import closing +import subprocess +import os +import sys +import shap +import random +import string +from uuid import getnode as get_mac +import platform +import datetime +import pyrebase +from config_det import data_det +from collections import deque +from sklearn import metrics +from sklearn.base import is_classifier, is_regressor +import pytest + +firebase_app = pyrebase.initialize_app(data_det) +ref = firebase_app.database() \ No newline at end of file diff --git a/lib/modules/local_explanation/__init__.py b/lib/modules/local_explanation/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/lib/modules/local_explanation/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/lib/modules/local_explanation/apps/insight_classification.py b/lib/modules/local_explanation/apps/insight_classification.py new file mode 100644 index 0000000..517ae3f --- /dev/null +++ b/lib/modules/local_explanation/apps/insight_classification.py @@ -0,0 +1,159 @@ +from imports import * + +""" +This class generates insights for each of the graphs. +Do not change anything from this class. + +""" + +class insight_classification(): + def __init__(self): + super(insight_classification, self).__init__() + self.param = None + + def insight_1_feature_imp(self, df, classes=[0, 1]): + + top_important_variables = [] + average_shap_values = [] + df_numpy = df.to_numpy() + + for i in range(3): + top_important_variables.append(df_numpy[-1 * (i+1)][0]) + average_shap_values.append(df_numpy[-1 * (i+1)][1]) + + + sentences = [] + sentences.append("The top three features influencing the models predicted outcome are: {}, {}. {}".format(str(top_important_variables[0]), str(top_important_variables[1]), str(top_important_variables[2]))) + + # sentences.append("According to your model, " + str(top_important_variables[0]) + " , " + + # str(top_important_variables[1]) + " , " + + # str(top_important_variables[2]) + " " + "are the top three most important variables.") + + return sentences + + def insight_2_global_feature_impact(self, df, outcome, expected_values, classes=[0, 1]): + + top_positive_variables = [] + average_shap_values_positive = [] + + top_negative_variables = [] + average_shap_values_negative = [] + + for i in range(1,4): + top_positive_variables.append(df.iloc[-1 * (i+1)][0]) + average_shap_values_positive.append(df.iloc[-1 * (i+1)][1]) + + top_negative_variables.append(df.iloc[i][0]) + average_shap_values_negative.append(df.iloc[i][1]) + + sentences = [] + sentences.append("Your model has {:.2%} confidence that the overall outcome will be {}.".format(expected_values[outcome], outcome)) + + sentences.append("\nOverall, on average, the top three variables influencing the model's prediction outcome are {} ({:.2%}), {} ({:.2%}), {} ({:.2%})".format(top_positive_variables[0], average_shap_values_positive[0], top_positive_variables[1], average_shap_values_positive[1], top_positive_variables[2], average_shap_values_positive[2])) + + sentences.append("\nOverall, on average, the top three variables influencing the model's prediction outcome are {} ({:.2%}), {} ({:.2%}), {} ({:.2%})".format(top_negative_variables[0], average_shap_values_negative[0], top_negative_variables[1], average_shap_values_negative[1], top_negative_variables[2], average_shap_values_negative[2])) + + # sentences.append("\nOverall, on average, the top three variables influencing the model's prediction outcome are" + + # + "{}, ({})".format(top_positive_variables[0], str(round(average_shap_values_positive[0] * 100, 2))) + + # "{}, ({})".format(top_positive_variables[1], str(round(average_shap_values_positive[1] * 100, 2))) + + # "{}, ({})".format(top_positive_variables[2], str(round(average_shap_values_positive[2] * 100, 2))) + + # + "\n") + + # sentences.append("\nOverall, on average, the top three variables influencing the model's prediction outcome are" + + # + "{}, ({})".format(top_negative_variables[0], str(round(average_shap_values_negative[0] * 100, 2))) + + # "{}, ({})".format(top_negative_variables[1], str(round(average_shap_values_negative[1] * 100, 2))) + + # "{}, ({})".format(top_negative_variables[2], str(round(average_shap_values_negative[2] * 100, 2))) + + # + "\n") + + # sentences.append("On average, the variable " + + # top_negative_variables[0] + ", " + + # top_negative_variables[1] + ",and " + + # top_negative_variables[2] + " " + + # "will change the probability for achieving outcome = " + + # str(outcome) + # + " by " + + # str(round(average_shap_values_negative[0] * 100, 2)) + "%," + + # str(round(average_shap_values_negative[1] * 100, 2)) + "%, and" + + # str(round(average_shap_values_negative[2] * 100, 2)) + "% respectively.") + + sentences.append("") + + return sentences + + def insight_3(self, df): + + top_positive_variables = [] + average_shap_values_positive = [] + + df_numpy = df.to_numpy() + + for i in range(3): + top_positive_variables.append(df_numpy[-1 * (i+1)][0]) + average_shap_values_positive.append(df_numpy[-1 * (i+1)][1]) + + sentences = [] + + sentences.append("Please explore the PDP graph to identify how different values of " + + top_positive_variables[0] + ", " + + top_positive_variables[1] + ", and " + + top_positive_variables[2] + " " + + " affect the model decision.") + + return sentences + + def insight_1_feature_imp(self, df, classes=[0, 1]): + # + top_important_variables = [] + average_shap_values = [] + + df_numpy = df.to_numpy() + + for i in range(1,4): + top_important_variables.append(df_numpy[-1 * (i+1)][0]) + average_shap_values.append(df_numpy[-1 * (i+1)][1]) + + sentences = [] + + sentences.append("The top three features influencing the models predicted outcome are: {}, {}. {}".format(str(top_important_variables[0]), top_important_variables[1], top_important_variables[2])) + + return sentences + + def insight_2_local_feature_impact(df, y_and_prob ): + + top_positive_variables = [] + average_shap_values_positive = [] + + top_negative_variables = [] + average_shap_values_negative = [] + + for i in range(1,4): + top_positive_variables.append(df.iloc[-1 * (i)][0]) + average_shap_values_positive.append(df.iloc[-1 * (i)][1]) + + top_negative_variables.append(df.iloc[i-1][0]) + average_shap_values_negative.append(df.iloc[i-1][1]) + + sentences = [] + #sentences.append("Model Prediction : " + str(y_and_prob[0])+ ", Confidence Level : "+ str(y_and_prob[1])) + + sentences.append("Model Decision: {}".format(y_and_prob[0])) + sentences.append("Model Confidence Level: {:.0%}".format(y_and_prob[1])) + + sentences.append("\nThe top three variables influencing the model's prediction outcome are {} ({:.2%}), {} ({:.2%}), {} ({:.2%})".format(top_positive_variables[0], average_shap_values_positive[0], top_positive_variables[1], average_shap_values_positive[1], top_positive_variables[2], average_shap_values_positive[2])) + + sentences.append("\nThe top three variables influencing the model's prediction outcome are {} ({:.2%}), {} ({:.2%}), {} ({:.2%})".format(top_negative_variables[0], average_shap_values_negative[0], top_negative_variables[1], average_shap_values_negative[1], top_negative_variables[2], average_shap_values_negative[2])) + + sentences.append("") + + return sentences + + def insight_4_pdp(self, df): + output = True + return output + + def insight_5(self, x): + output = True + return output + + + diff --git a/lib/modules/local_explanation/apps/insight_regression.py b/lib/modules/local_explanation/apps/insight_regression.py new file mode 100644 index 0000000..354adad --- /dev/null +++ b/lib/modules/local_explanation/apps/insight_regression.py @@ -0,0 +1,166 @@ +from imports import * + +""" +This class calculates feature importance + +Input: + + +""" + + +class insight_regression(): + def __init__(self): + super(insight_regression, self).__init__() + self.param = None + + def insight_1_feature_imp(self, df): + # + top_important_variables = [] + average_shap_values = [] + + + df_numpy = df.to_numpy() + + for i in range(3): + top_important_variables.append(df_numpy[-1 * (i+1)][0]) + average_shap_values.append(df_numpy[-1 * (i+1)][1]) + + sentences = [] + + sentences.append( + "This graph helps you identify which features in your dataset have the greatest effect on the outcomes of your machine learning model.") + sentences.append("In this graph, you can see: " + str(top_important_variables[0]) + " " + + str(top_important_variables[1]) + " " + + str(top_important_variables[ + 2]) + " " + "are the top three most important variables according to your machine learning model.") + + sentences.append( + "Remember, each variable might affect the outcome differently. Let’s scroll down and explore how each variable impacts the outcome.") + + sentences.append("On average, the variable " + str(top_important_variables[0]) + " " + + "will change the model outcome by " + + str(round(average_shap_values[0], 2))) + + return sentences + + def insight_2_global_feature_impact(self, df, expected_values): + + top_positive_variables = [] + average_shap_values_positive = [] + + top_negative_variables = [] + average_shap_values_negative = [] + + df_numpy = df.to_numpy() + + + for i in range(3): + top_positive_variables.append(df_numpy[-1 * (i+1)][0]) + average_shap_values_positive.append(df_numpy[-1 * (i+1)][1]) + + top_negative_variables.append(df_numpy[i][0]) + average_shap_values_negative.append(df_numpy[i][1]) + + sentences = [] + + sentences.append("The average outcome predicted by the model is " + str(expected_values)) + + sentences.append("On average, the variable " + + top_positive_variables[0] + ", " + + top_positive_variables[1] + ",and " + + top_positive_variables[2] + " " + + "will increase the average model outcome(" + str(expected_values) + ")" + + " by " + + str(round(average_shap_values_positive[0], 2)) + ", " + + str(round(average_shap_values_positive[1], 2)) + ", and " + + str(round(average_shap_values_positive[2], 2)) + " respectively.") + + sentences.append("On average, the variable " + + top_negative_variables[0] + ", " + + top_negative_variables[1] + ",and " + + top_negative_variables[2] + " " + + "will change the average model outcome(" + str(expected_values) + ")" + + " by " + + str(round(average_shap_values_negative[0], 2)) + ", " + + str(round(average_shap_values_negative[1], 2)) + ", and" + + str(round(average_shap_values_negative[2], 2)) + " respectively.") + + sentences.append("") + + return sentences + + def insight_2_local_feature_impact(df, y_and_prob): + + top_positive_variables = [] + average_shap_values_positive = [] + + top_negative_variables = [] + average_shap_values_negative = [] + + df_numpy = df.to_numpy() + print("insights") + print(df_numpy) + + for i in range(3): + top_positive_variables.append(df_numpy[-1 * (i+1)][0]) + average_shap_values_positive.append(df_numpy[-1 * (i+1)][1]) + + top_negative_variables.append(df_numpy[i][0]) + average_shap_values_negative.append(df_numpy[i][1]) + + sentences = [] + + sentences.append("Model Prediction : " + str(y_and_prob[0])) + + sentences.append(" ") + + # sentences.append("Top 3 Positive Impact Variables : " + + # top_positive_variables[0] + ", " + + # top_positive_variables[1] + ", " + + # top_positive_variables[2] + " " ) + + sentences.append(f'Top 3 Positive Impact Variables: {top_positive_variables[0]} (+{average_shap_values_positive[0]:.2f}), {top_positive_variables[1]} (+{average_shap_values_positive[1]:.2f}), {top_positive_variables[2]} (+{average_shap_values_positive[2]:.2f})') + + sentences.append(f'Top 3 Negative Impact Variables: {top_negative_variables[0]} ({average_shap_values_negative[0]:.2f}), {top_negative_variables[1]} ({average_shap_values_negative[1]:.2f}), {top_negative_variables[2]} ({average_shap_values_negative[2]:.2f})') + + # sentences.append("Top 3 Negative Impact Variables : " + + # top_negative_variables[0] + ", " + + # top_negative_variables[1] + ", " + + # top_negative_variables[2] + " " ) + + sentences.append("") + + return sentences + + def insight_3(self, df): + + top_positive_variables = [] + average_shap_values_positive = [] + + df_numpy = df.to_numpy() + + for i in range(3): + top_positive_variables.append(df_numpy[-1 * (i+1)][0]) + average_shap_values_positive.append(df_numpy[-1 * (i+1)][1]) + + sentences = [] + + sentences.append("Please explore the PDP graph to identify how different values of " + + top_positive_variables[0] + ", " + + top_positive_variables[1] + ", and " + + top_positive_variables[2] + " " + + " affect the model decision.") + + return sentences + + def insight_4_pdp(self, df): + output = True + return output + + def insight_5(self, x): + output = True + return output + + + diff --git a/lib/modules/local_explanation/apps/insights.py b/lib/modules/local_explanation/apps/insights.py new file mode 100644 index 0000000..50d5194 --- /dev/null +++ b/lib/modules/local_explanation/apps/insights.py @@ -0,0 +1,48 @@ +from explainx.lib.imports import * +from .insight_classification import * +from .insight_regression import * +from explainx.lib.utils import is_classification + +""" +This class calculates feature importance + +Input: + + +""" + + +class insights(): + def __init__(self,model): + super(insights, self).__init__() + + self.model = model + self.regression = insight_regression() + self.classification = insight_classification() + + def insight_1_feature_imp(self, df): + if is_classification(self.model) == True: + return self.classification.insight_1_feature_imp(df) + else: + return self.regression.insight_1_feature_imp(df) + + def insight_2_global_feature_impact(self, df, outcome=0): + if is_classification(self.model) == True: + return self.classification.insight_2_global_feature_impact(df, outcome, self.param["expected_values"], self.param["classes"]) + else: + return self.regression.insight_2_global_feature_impact(df, self.param["expected_values"][0]) + + def insight_2_local_feature_impact(self, df, y_and_prob): + if is_classification(self.model) == True: + return self.classification.insight_2_local_feature_impact(df, y_and_prob) + else: + return self.regression.insight_2_local_feature_impact(df, y_and_prob) + + def insight_3(self, df): + if is_classification(self.model) == True: + return self.classification.insight_3(df) + else: + return self.regression.insight_3(df) + + + diff --git a/lib/modules/local_explanation/apps/local_explanation.py b/lib/modules/local_explanation/apps/local_explanation.py new file mode 100644 index 0000000..1925a16 --- /dev/null +++ b/lib/modules/local_explanation/apps/local_explanation.py @@ -0,0 +1,164 @@ +from explainx.lib.imports import * +from explainx.lib.plotly_css import * +from explainx.lib.plotly_graphs import * +from explainx.lib.protodash import * +from explainx.lib.frameworks.shapley_values import ShapleyValues +from explainx.lib.utils import is_classification +from .insights import * +from .insight_classification import * +from .insight_regression import * +from .what_if import * + +def layout_local(ShapleyValues, x_test, df_with_shap, app): + + layout = html.Div([ + # Input and Data + html.Div([ + html.H3('What-If Analysis'), + html.Div([ + html.P("Datapoint Index: "), + dcc.Input( + id="row_number", + type="number", + value=1, + placeholder="Enter a Row Number e.g. 1, 4, 5", + style={'text-align': 'center'}), + html.Button( + id='submit-button-state', + n_clicks=0, + children='Predict', + style={'margin-left':'10px'}), + ], style={"display":"flex"}) + ],style={'margin-bottom':"10px"}), + + # End of Input & Data Div + html.Div([ + #What-If Div + html.Div([ + html.Div([ + dbc.Table(html.Thead(html.Tr([html.Th("Feature"), html.Th("Value")])), + bordered=True, + dark=True, + hover=True, + responsive=True, + striped=True, + style={'width':'100%'}), + + html.Div(id="place_form_here", className="place_form") + ]) + ], style={'width':'29%'}), + + html.Div([], style={'width':"1%"}), + + #Tabs Div + html.Div([dcc.Tabs([ + dcc.Tab(label='Local Feature Explanation', children=[ + html.Div(id='datatable-interactivity-container2', children=[ + html.Div([ + html.Div([ + html.Div([ + html.H4('Features Influencing This Predictions', className="local_impact_heading"), + html.P( + 'This graph identifies which features (also known as columns or inputs) in your dataset had a positive or negative influence on the final outcome of your machine learning model.', className="local_impact_details"), + ]), + + + dcc.Loading( + id="local_feature_impact_1", + type="circle", + children=dbc.Row( + [ + dbc.Col(html.Div(dcc.Graph(id='local_feature_impact', + style={'marginLeft': 10, 'height': '590px'})), width=8), + dbc.Col( + [ + html.Div([ + html.H4(id='local_message_1'), + html.H4(id='local_message_2'), + html.H5("How this prediction was determined?"), + html.H5(id='local_message_3'), + html.H5(id='local_message_4') + ]), + + ] + , width=4), + + ])) + + ], + style=style31, + ), + ],style=style32)], style={'height': '400'})]), + + ]) + + ], style={"width":'69%'}) + + ], style={'display':'flex'}), + + ]) + + @app.callback( + Output('place_form_here', 'children'), + [Input('row_number', 'value')]) + def create_what_if_form(row_number): + x = what_if() + i = 0 + + if type(row_number) == type(1): + i = row_number + + array = df_with_shap[i:i+1] + + impact_variables = [col for col in array if '_impact' in col] + for col in impact_variables: + array.drop([col], axis=1, inplace=True) + + form = x.what_if_form(array, x_test.columns) + return form + + + callback_input = [Input(f + '_slider', 'value') for f in x_test.columns] + callback_input.append(Input('submit-button-state', 'n_clicks')) + callback_input.append(Input('row_number', 'value')) + + # Local Feature Impact Graph + @app.callback( + [Output('local_feature_impact', "figure"), + Output('local_message_1', "children"), + Output('local_message_2', "children"), + Output('local_message_3', "children"), + Output('local_message_4', 'children')], + + callback_input, prevent_initial_call=True) + def update_impact_graph(*values): + changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0] + + + df = pd.DataFrame([values[:-2]]) + df.columns = x_test.columns + + array = ShapleyValues.add_shap_row(df, values[-1]) + g = plotly_graphs() + figure, dat = g.local_feature_impact_graph(array) + + if is_classification(ShapleyValues.model): + y_and_prob = [] + y_and_prob.append(int(array["Model Decision"])) + y_and_prob.append(round(float(array["Probability: "+str(int(array["Model Decision"])) ]),2)) + + message = insight_classification.insight_2_local_feature_impact(dat, y_and_prob) + else: + y_and_prob = [] + y_and_prob.append(int(array["Model Decision"])) + message = insight_regression.insight_2_local_feature_impact(dat, y_and_prob) + + + return figure, message[0], message[1], message[2], message[3] + + + return layout + + + + diff --git a/lib/modules/local_explanation/apps/what_if.py b/lib/modules/local_explanation/apps/what_if.py new file mode 100644 index 0000000..1497532 --- /dev/null +++ b/lib/modules/local_explanation/apps/what_if.py @@ -0,0 +1,153 @@ +from explainx.lib.imports import * +from explainx.lib.feature_impact import * +from explainx.lib.feature_importance import * +import dash_daq as daq + + +class what_if(): + def __init__(self): + super(what_if, self).__init__() + #self.data = input_data + + # save all important variables here. + def col_values_pairs(self, df, features): + df_features= df[features] + df_describe= df_features.describe() + + """ + which features are categorical? + find their feature value pairs + """ + + numnerical_col= df_describe.columns + categorical_col= list(set(features)- set(numnerical_col)) + + col_type_dict={} + col_value_dict={} + for f in numnerical_col: + col_type_dict[f]="slider" + col_value_dict[f]= self.get_min_max_featurs(df_describe, f) + + + for f in categorical_col: + categ,col_type= self.all_categories(df, f) + + col_type_dict[f]= col_type + col_value_dict[f]= categ + + return col_value_dict, col_type_dict + + + def all_categories(self, df, feature_name): + + """ + find all categories in categorical features. + """ + all_categories=list(set(df[feature_name])) + + return all_categories, 'dropdown' + + + def get_min_max_featurs(self, df_describe, feature_name): + + x_des_var_list=list(df_describe[feature_name]) + q00= x_des_var_list[3]-25/100*x_des_var_list[3] + q0= x_des_var_list[3] + q1= x_des_var_list[4] + q2= x_des_var_list[5] + q3= x_des_var_list[6] + q4= x_des_var_list[7] + q5= x_des_var_list[7]+ 25/100*x_des_var_list[7] + ans= [q00,q0,q1,q2,q3,q4,q5] + + # round off to 2 dp + for i in range(len(ans)): + ans[i]= round(ans[i],2) + + return ans + + def what_if_form(self, df, features): + col_value_dict, col_type_dict = self.col_values_pairs(df, features) + + form_group_array=[] + for f in features: + """ + if feature type== slider + then following is correct. + """ + if col_type_dict[f]=='slider': + #form_group_array.append(self.form_group_slider(f, col_value_dict[f])) + + + form_group_array.append(self.form_group_input(f, col_value_dict[f])) + + elif col_type_dict[f]=='radio': + + form_group_array.append(self.form_group_radio(f, col_value_dict[f])) + else: + + form_group_array.append(self.form_group_dropdown(f, col_value_dict[f])) + + + return html.Form(form_group_array) + + def form_group_input(self, feature, values, value=None): + marks= {} + for v in values: + marks[v]=str(v) + + if value==None: + value=values[3] + + fg= dbc.FormGroup([ + html.Div(feature, className="form_feature"), + html.Div(dcc.Input( + className = "form_group_input", + id=feature+'_slider', + type="number", + value = value, + debounce=True, + placeholder="Enter the value"))], + className="what_if_form_group") + + return fg + + + def form_group_slider(self, feature, values, value=None): + marks= {} + for v in values: + marks[v]=str(v) + + if value==None: + value=values[3] + + fg= dbc.FormGroup([ + html.Div(feature, className="form_feature"), + html.Div(daq.Slider( + className = "form_group_slider", + id=feature+'-slider', + min=values[0], + max=values[-1], + handleLabel={"showCurrentValue": True,"label": 'Value'}))], className="what_if_form_group") + + return fg + + + def form_group_dropdown(self, feature, values, value=None): + options= [] + for v in values: + options.append({'label': v, 'value': v}) + + if value==None: + value=values[0] + + fg = dbc.FormGroup([ + html.Div(feature, className="form_feature"), + html.Div(dcc.Dropdown( + className = "form_group_dropdown", + id= feature+'-slider', + options=options, + value=value))],className="what_if_form_group") + + return fg + diff --git a/lib/modules/local_explanation/assets/favicon.ico b/lib/modules/local_explanation/assets/favicon.ico new file mode 100644 index 0000000..1224c37 Binary files /dev/null and b/lib/modules/local_explanation/assets/favicon.ico differ diff --git a/lib/modules/local_explanation/assets/topography.css b/lib/modules/local_explanation/assets/topography.css new file mode 100644 index 0000000..0663147 --- /dev/null +++ b/lib/modules/local_explanation/assets/topography.css @@ -0,0 +1,623 @@ +/* Table of contents +–––––––––––––––––––––––––––––––––––––––––––––––––– +- Plotly.js +- Grid +- Base Styles +- Typography +- Links +- Buttons +- Forms +- Lists +- Code +- Tables +- Spacing +- Utilities +- Clearing +- Media Queries +*/ + +/* Grid +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.container { + position: relative; + width: 100%; + max-width: 960px; + margin: 0 auto; + padding: 0 20px; + box-sizing: border-box; } +.column, +.columns { + width: 100%; + float: left; + box-sizing: border-box; } + +/* For devices larger than 400px */ +@media (min-width: 400px) { + .container { + width: 85%; + padding: 0; } +} + +/* For devices larger than 550px */ +@media (min-width: 550px) { + .container { + width: 80%; + } + .column, + .columns { + margin-left: 4%; } + .column:first-child, + .columns:first-child { + margin-left: 0; } + + .one.column, + .one.columns { width: 4.66666666667%; } + .two.columns { width: 13.3333333333%; } + .three.columns { width: 22%; } + .four.columns { width: 30.6666666667%; } + .five.columns { width: 39.3333333333%; } + .six.columns { width: 48%; } + .seven.columns { width: 56.6666666667%; } + .eight.columns { width: 65.3333333333%; } + .nine.columns { width: 74.0%; } + .ten.columns { width: 82.6666666667%; } + .eleven.columns { width: 91.3333333333%; } + .twelve.columns { width: 98%; margin-left: 0; margin-right: 0;} + + .one-third.column { width: 30.6666666667%; } + .two-thirds.column { width: 65.3333333333%; } + + .one-half.column { width: 48%; } + + /* Offsets */ + .offset-by-one.column, + .offset-by-one.columns { margin-left: 8.66666666667%; } + .offset-by-two.column, + .offset-by-two.columns { margin-left: 17.3333333333%; } + .offset-by-three.column, + .offset-by-three.columns { margin-left: 26%; } + .offset-by-four.column, + .offset-by-four.columns { margin-left: 34.6666666667%; } + .offset-by-five.column, + .offset-by-five.columns { margin-left: 43.3333333333%; } + .offset-by-six.column, + .offset-by-six.columns { margin-left: 52%; } + .offset-by-seven.column, + .offset-by-seven.columns { margin-left: 60.6666666667%; } + .offset-by-eight.column, + .offset-by-eight.columns { margin-left: 69.3333333333%; } + .offset-by-nine.column, + .offset-by-nine.columns { margin-left: 78.0%; } + .offset-by-ten.column, + .offset-by-ten.columns { margin-left: 86.6666666667%; } + .offset-by-eleven.column, + .offset-by-eleven.columns { margin-left: 95.3333333333%; } + + .offset-by-one-third.column, + .offset-by-one-third.columns { margin-left: 34.6666666667%; } + .offset-by-two-thirds.column, + .offset-by-two-thirds.columns { margin-left: 69.3333333333%; } + + .offset-by-one-half.column, + .offset-by-one-half.columns { margin-left: 52%; } + +} + + +/* Base Styles +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +/* NOTE +html is set to 62.5% so that all the REM measurements throughout Skeleton +are based on 10px sizing. So basically 1.5rem = 15px :) */ +html { + font-size: 62.5%; } +body { + font-size: 1.2em; /* currently ems cause chrome bug misinterpreting rems on body element */ + line-height: 1.6; + font-weight: 400; +/* font-family: "Open Sans", "HelveticaNeue", "Helvetica Neue", Helvetica, Arial, sans-serif; */ + /* font-family: 'Montserrat'; */ + font-family: 'Montserrat', sans-serif; + color: #263238; /* Material blue-grey 900*/ + background-color: #fff; /* Material blue-grey 100*/ + margin: 0%; +} + + +/* Typography +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +h1, h2, h3, h4, h5, h6 { + margin-top: 0; + margin-bottom: 0; + font-weight: 300; } +h1 { font-size: 3.5rem; line-height: 1.2; letter-spacing: -.1rem; margin-bottom: 2rem; } +h2 { font-size: 2.6rem; line-height: 1.25; letter-spacing: -.1rem; margin-bottom: 1.8rem; margin-top: 1.8rem;} +h3 { font-size: 2.0rem; line-height: 1.3; letter-spacing: -.1rem; margin-bottom: 1.5rem; margin-top:4.5rem;} +h4 { font-size: 1.6rem; line-height: 1.35; letter-spacing: -.08rem; margin-bottom: 1.2rem; margin-top: 1.2rem;} +h5 { font-size: 1.2rem; line-height: 1.5; letter-spacing: -.05rem; margin-bottom: 0.6rem; margin-top: 0.6rem;} +h6 { font-size: 1.0rem; line-height: 1.6; letter-spacing: 0; margin-bottom: 0.75rem; margin-top: 0.75rem;} + +p { font-size: 1.1rem ; margin-top: 1rem; margin-bottom:1rem; overflow-wrap: break-word; margin-left:2px; margin-right:3px;} + + +/* Blockquotes +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +blockquote { + border-left: 4px lightgrey solid; + padding-left: 1rem; + margin-top: 2rem; + margin-bottom: 2rem; + margin-left: 0rem; +} + + +/* Links +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +a { + color: #1565c0; /* Material Blue 800 */ + text-decoration: underline; + cursor: pointer;} +a:hover { + color: #0d47a1; /* Material Blue 900 */ +} + + +/* Buttons +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.button, +button, +input[type="submit"], +input[type="reset"], +input[type="button"] { + display: inline-block; + height: 38px; + padding: 0 30px; + color: #455A64; /* Material blue-gray 700*/ + text-align: center; + font-size: 11px; + font-weight: 600; + line-height: 38px; + letter-spacing: .1rem; + text-transform: uppercase; + text-decoration: none; + white-space: nowrap; + background-color: transparent; + border-radius: 4px; + border: 1px solid #B0BEC5; /* Material blue-gray 200*/ + cursor: pointer; + box-sizing: border-box; } +.button:hover, +button:hover, +input[type="submit"]:hover, +input[type="reset"]:hover, +input[type="button"]:hover, +.button:focus, +button:focus, +input[type="submit"]:focus, +input[type="reset"]:focus, +input[type="button"]:focus { + color: #333; + border-color: #888; + outline: 0; } +.button.button-primary, +button.button-primary, +input[type="submit"].button-primary, +input[type="reset"].button-primary, +input[type="button"].button-primary { + color: #FFF; + background-color: #33C3F0; + border-color: #33C3F0; } +.button.button-primary:hover, +button.button-primary:hover, +input[type="submit"].button-primary:hover, +input[type="reset"].button-primary:hover, +input[type="button"].button-primary:hover, +.button.button-primary:focus, +button.button-primary:focus, +input[type="submit"].button-primary:focus, +input[type="reset"].button-primary:focus, +input[type="button"].button-primary:focus { + color: #FFF; + background-color: #1EAEDB; + border-color: #1EAEDB; } + + +/* Forms +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +input[type="email"], +input[type="number"], +input[type="search"], +input[type="text"], +input[type="tel"], +input[type="url"], +input[type="password"], +textarea, +select { + height: 38px; + padding: 6px 10px; /* The 6px vertically centers text on FF, ignored by Webkit */ + background-color: #fff; + border: 1px solid #D1D1D1; + border-radius: 4px; + box-shadow: none; + box-sizing: border-box; + font-family: inherit; + font-size: inherit; /*https://stackoverflow.com/questions/6080413/why-doesnt-input-inherit-the-font-from-body*/} +/* Removes awkward default styles on some inputs for iOS */ +input[type="email"], +input[type="number"], +input[type="search"], +input[type="text"], +input[type="tel"], +input[type="url"], +input[type="password"], +textarea { + -webkit-appearance: none; + -moz-appearance: none; + appearance: none; } +textarea { + min-height: 65px; + padding-top: 6px; + padding-bottom: 6px; } +input[type="email"]:focus, +input[type="number"]:focus, +input[type="search"]:focus, +input[type="text"]:focus, +input[type="tel"]:focus, +input[type="url"]:focus, +input[type="password"]:focus, +textarea:focus, +select:focus { + border: 1px solid #33C3F0; + outline: 0; } +label, +legend { + display: block; + margin-bottom: 0px; } +fieldset { + padding: 0; + border-width: 0; } +input[type="checkbox"], +input[type="radio"] { + display: inline; } +label > .label-body { + display: inline-block; + margin-left: .5rem; + font-weight: normal; } + + +/* Lists +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +ul { + list-style: circle inside; } +ol { + list-style: decimal inside; } +ol, ul { + padding-left: 0; + margin-top: 0; } +ul ul, +ul ol, +ol ol, +ol ul { + margin: 1.5rem 0 1.5rem 3rem; + font-size: 90%; } +li { + margin-bottom: 0; +} + +/* Tables +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +table { + border-collapse: collapse; +} +th:not(.CalendarDay), +td:not(.CalendarDay) { + padding: 12px 15px; + text-align: left; + border-bottom: 1px solid #E1E1E1; } +th:first-child:not(.CalendarDay), +td:first-child:not(.CalendarDay) { + padding-left: 0; } +th:last-child:not(.CalendarDay), +td:last-child:not(.CalendarDay) { + padding-right: 0; } + + +/* Spacing +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +button, +.button { + margin-bottom: 0rem; } +input, +textarea, +select, +fieldset { + margin-bottom: 0rem; } +pre, +dl, +figure, +table, +form { + margin-bottom: 0rem; } +p, +ul, +ol { + margin-bottom: 0.75rem; } + +/* Utilities +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.u-full-width { + width: 100%; + box-sizing: border-box; } +.u-max-full-width { + max-width: 100%; + box-sizing: border-box; } +.u-pull-right { + float: right; } +.u-pull-left { + float: left; } + + +/* Misc +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +hr { + margin-top: 3rem; + margin-bottom: 3.5rem; + border-width: 0; + border-top: 1px solid #E1E1E1; } + + +/* Clearing +–––––––––––––––––––––––––––––––––––––––––––––––––– */ + +/* Self Clearing Goodness */ +.container:after, +.row:after, +.u-cf { + content: ""; + display: table; + clear: both; } + + +/* Media Queries +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +/* +Note: The best way to structure the use of media queries is to create the queries +near the relevant code. For example, if you wanted to change the styles for buttons +on small devices, paste the mobile query code up in the buttons section and style it +there. +*/ + + +/* Larger than mobile */ +@media (min-width: 400px) {} + +/* Larger than phablet (also point when grid becomes active) */ +@media (min-width: 550px) {} + +/* Larger than tablet */ +@media (min-width: 750px) {} + +/* Larger than desktop */ +@media (min-width: 1000px) {} + +/* Larger than Desktop HD */ +@media (min-width: 1200px) {} + +/* Pretty container +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.pretty_container { + border-radius: 5px; + background-color: #f3f3f1; /* Mapbox light map land color */ + margin: 1%; + padding: 1%; + position: relative; + box-shadow: 1px 1px 1px slategrey; +} + +.container_title { + margin-top: 0; + margin-bottom: 0.2em; + font-size: 2.6rem; + line-height: 2.6rem; +} + +/* Customize Loading Spinner +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.dash-default-spinner { + margin-top: 0 !important; + margin-bottom: 0 !important; + padding: 50px; +} + +.dash-default-spinner > div { + background-color: #4e5964 !important; +} + +/* Special purpose buttons +–––––––––––––––––––––––––––––––––––––––––––––––––– */ +.reset-button { + width: 100%; + margin-top: 10px; + height: 30px; + line-height: 30px; +} + +.info-icon { + float: right; + cursor: pointer; + height: 2.2rem; + width: 2.2rem; + margin: 0.2rem; +} + + +/* Modal info layer +–––––––––––––––––––––––––––––––––––––––––––––––––– */ + + +.modal-content { + z-index: 1004; /* Sit on top, including modebar which has z=1001 */ + position: fixed; + left: 0; + width: 60%; + background-color: #fff; /* Material indigo 600 */ + color: black; + border-radius: 5px; + margin-left: 20%; + margin-bottom: 2%; + margin-top: 2%; +} + +.modal-content > div { + text-align: left; + margin: 15px; +} + +.modal-content.bottom { + bottom: 0; +} + +.modal-content.top { + top: 0; +} + +.place_form { + max-height: 740px; + max-width: 560px; + overflow: scroll; + overflow: scroll; + border-top: 0 0 10px 10px; + box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.19); + border-radius: 10px 10px 10px 10px; + padding: 10px 10px 10px 10px; + +} + +.form_feature { + font-size: 10px; + white-space: normal; + overflow-wrap: break-word; + width: 60%; + padding: 1px; + border-bottom: 1px solid #95a5a6; + padding-top: 20px; +} + + +.what_if_form_group { + display: flex; + width: 100%; + height: 40px; + +} + +.form_group_input { + font-size:10px; + width: 100%; + display: inline-block; + padding: 1px; + height: 40px; +} + + +.form_group_slider { + display: inline-block; + padding: 1px; + height: 40px; +} + +.form_group_dropdown { + font-size:10px; + width: 250%; + display: inline-block; + padding: 1px; + height: 40px; +} + + +.global_feature_importance_graph { + margin-left: 50; + margin-top: 0; + height: 100%; + font-size: 10px; +} + + +.modebar{ + display: none !important; +} + +.local_impact_heading { + padding-top: 2rem; + margin-left: 2rem; +} + +.local_impact_details { + margin: 0.5rem 1rem 3rem 2rem; +} + +.global_explanation_image { + max-width: 50%; +} + +.welcome_box { + display: flex; + justify-content: center; + align-items: center; + border-radius: 10px; + box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.19); + flex-direction: column; + padding: 20px; + margin: 20px; + width: 50%; + height: 50%; + +} + +.main_welcome_div { + display: flex; + width: 100%; + justify-content: center; + align-content: center; + align-items: center; +} + +.link { + margin-top: 20px; + max-width: 100%; + font-size: 15px; + +} + +.predicted_outcome { + padding-top: 0.1rem; + margin-left: 2rem; +} + +.model_confidence { + + margin-left: 2rem; +} + +.message_div { + display: flex; +} + +.main_div { + padding: 1% 1% 1% 1%; + +} + +.metrics_div { + display: flex; +} + +.insights_div_1 { + margin-left: 70px; +} + +.aggregate_imp { + padding-left: 20px; + padding-right: 20px; + +} \ No newline at end of file diff --git a/lib/modules/local_explanation/index.py b/lib/modules/local_explanation/index.py new file mode 100644 index 0000000..494ce37 --- /dev/null +++ b/lib/modules/local_explanation/index.py @@ -0,0 +1,45 @@ +import dash +import dash_core_components as dcc +import dash_html_components as html +from dash.dependencies import Input, Output +import dash +import dash_bootstrap_components as dbc +from jupyter_dash import JupyterDash + +# from app import app +from .apps import local_explanation + + +class localExplanation(): + def __init__(self, x_test, ShapleyValues=None, df_with_shap=None): + self.data = x_test + self.df_with_shap = df_with_shap + self.shapley_values = ShapleyValues + + def main_function(self, mode): + + external_stylesheets = ['https://raw.githubusercontent.com/rab657/explainx/master/explainx.css', + dbc.themes.BOOTSTRAP, + { + 'href': 'https://fonts.googleapis.com/css?family=Montserrat', + 'rel': 'stylesheet' + } + ] + local = JupyterDash(__name__, external_stylesheets=external_stylesheets, suppress_callback_exceptions=True) + + local.title = "explainX.ai - Local Level Explanation" + + local.layout = local_explanation.layout_local(self.shapley_values, self.data, self.df_with_shap, local) + debug_value = False + if mode is None: + import random + port = random.randint(6000, 7000) + return local.run_server(port=port, debug=debug_value, dev_tools_ui=debug_value, + dev_tools_props_check=debug_value, dev_tools_silence_routes_logging=True, + dev_tools_hot_reload=True) + else: + import random + port = random.randint(6000, 7000) + return local.run_server(mode='inline', port=port, debug=debug_value, dev_tools_ui=debug_value, + dev_tools_props_check=debug_value, dev_tools_silence_routes_logging=True, + dev_tools_hot_reload=True) diff --git a/lib/plotly_css.py b/lib/plotly_css.py index 3099d31..db3af1e 100644 --- a/lib/plotly_css.py +++ b/lib/plotly_css.py @@ -114,7 +114,7 @@ 'border-radius': '15px 15px 15px 15px', 'box-shadow': '0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.19)', 'border-right': '1px solid #2c3e50', 'border-bottom': '1px solid #2c3e50', - 'marginTop': 50, 'width': '95%'} + 'marginTop': 50, 'width': '100%'} style17 = {'backgroundColor': '#fff', 'color': 'black', @@ -147,8 +147,8 @@ style20 = {'marginBottom': 50, 'marginTop': 50, - 'marginLeft': "1%", - 'width': '95%', } + #'marginLeft': "1%", + 'width': '100%', } style21 = {'backgroundColor': '#fff', 'color': 'black', diff --git a/lib/plotly_graphs.py b/lib/plotly_graphs.py index 5ee8a46..7535aff 100644 --- a/lib/plotly_graphs.py +++ b/lib/plotly_graphs.py @@ -56,7 +56,7 @@ def summary_plot(self, df,classification=False): return df2 - def summary_plot_graph(self, df,classification=False): + def summary_plot_graph(self, df): summary_plot = px.scatter(df, x="Feature Impact on Outcome", y="Feature Name", color="Rescaled Feature Value", hover_data=["Original Feature Value"], color_continuous_scale="Bluered_r", template="plotly_white") return summary_plot @@ -70,7 +70,7 @@ def partial_dependence_plot(self, df, v1=None, v2=None, v3=None): def pdp_plot(self, df, v1, v2, v3): g = px.scatter(df, x=v1, y=v2, color=v3, color_continuous_scale="Bluered_r", - color_discrete_sequence=px.colors.sequential.Plasma_r, template="plotly_white") + color_discrete_sequence= px.colors.sequential.Plasma_r, template="plotly_white") return g diff --git a/lib/rescale_numeric_feature.py b/lib/rescale_numeric_feature.py index ec4cc4a..730cbf3 100644 --- a/lib/rescale_numeric_feature.py +++ b/lib/rescale_numeric_feature.py @@ -1,13 +1,5 @@ from imports import * -""" -This class calculates feature importance - -Input: - - -""" - class get_cols(): def __init__(self): diff --git a/lib/shap_pdp.py b/lib/shap_pdp.py index 6f6c550..e8abe04 100644 --- a/lib/shap_pdp.py +++ b/lib/shap_pdp.py @@ -1,13 +1,5 @@ from imports import * -""" -This class calculates feature importance - -Input: - - -""" - class shap_pdp(): def __init__(self): diff --git a/lib/summary_plot.py b/lib/summary_plot.py index b5e92ce..965b0e9 100644 --- a/lib/summary_plot.py +++ b/lib/summary_plot.py @@ -1,12 +1,5 @@ from imports import * from rescale_numeric_feature import * -""" -This class helps to plot summary plot - -Input: - - -""" class summary_plot(): @@ -17,15 +10,11 @@ def __init__(self): def find(self, df): - column = get_cols() self.original_columns = column.get_all_cols(df) - re= rescale_numeric_features() df_with_rescaled_features= re.rescale(df) - - final_dataframe= self.rearrange_dataframe(df_with_rescaled_features ) - + final_dataframe= self.rearrange_dataframe( df_with_rescaled_features ) return final_dataframe def rearrange_dataframe(self, df_re ): diff --git a/lib/utils.py b/lib/utils.py new file mode 100644 index 0000000..4bf1f80 --- /dev/null +++ b/lib/utils.py @@ -0,0 +1,15 @@ +def is_classification(model): + """ [Returns if the problem type is classification or regression] + + Args: + model function + + Returns: + True if classification + False if regression + """ + from sklearn.base import is_classifier, is_regressor + if is_classifier(model) & ~is_regressor(model): + return True + else: + return False \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..657f697 --- /dev/null +++ b/main.py @@ -0,0 +1,73 @@ +from explainx import * +from explainx.lib.imports import * +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +import json +from explainx.lib.models.transformer_default import defaultTransformer +from explainx.lib.models.modelprocessor import ModelProcessor +from explainx.lib.modules.local_explanation.index import localExplanation +from explainx.lib.modules.cohort_analysis.index import cohort +from explainx.lib.modules.feature_interactions.index import featureInteraction +from explainx.lib.frameworks.shapley_values import ShapleyValues + + +class ExplainxModules: + def __init__(self): + self.model = None + self.input_data = None + self.target_data = None + self.ct = None + self.ModelProcessor = None + self.ShapleyValues = None + + def ai(self, model, x_test, y_test, ct=None): + self.model = model + self.input_data = x_test + self.target_data = y_test + self.ct = ct + + self.ModelProcessor = ModelProcessor(self.model, self.input_data, self.target_data, self.ct) + self.ShapleyValues = ShapleyValues(self.model, self.input_data, self.target_data, self.ct) + + def default_transformer(self): + return self.ModelProcessor.columnTransformer(data) + + def predicted_columns(self): + ''' + return true values in a dict format + ''' + self.ModelProcessor.create_prediction_columns() + return self.ModelProcessor.predicted_columns + + def dataframe_graphing(self): + ''' + return dataframe for graphing + ''' + main_dataset = self.input_data.copy() + for i in self.predicted_columns().keys(): + main_dataset[i] = self.predicted_columns()[i] + return main_dataset + + def metrics(self): + ''' + returns metrics as a dictionary + ''' + return self.ModelProcessor.log_metrics() + + def shap_df(self): + ''' + global level explanation + ''' + _, _, df_with_shap = self.ShapleyValues.global_shap_plotting() + return df_with_shap + + def what_if_analysis(self, mode=None): + return localExplanation(self.input_data, self.ShapleyValues, self.shap_df()).main_function(mode) + + def feature_interactions(self, mode=None): + return featureInteraction(self.dataframe_graphing(), self.shap_df()).main_function(mode) + + def cohort_analysis(self, mode=None): + return cohort(self.dataframe_graphing(), self.model).main_function(mode) + + +explainx_modules = ExplainxModules() diff --git a/main_page_banner.png b/main_page_banner.png new file mode 100644 index 0000000..c0ab0b2 Binary files /dev/null and b/main_page_banner.png differ diff --git a/started_example.png b/started_example.png new file mode 100644 index 0000000..9f937fa Binary files /dev/null and b/started_example.png differ