In [1]:
import pandas as pd
import numpy as np
from imblearn.under_sampling import ClusterCentroids
from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTETomek
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split,KFold,cross_val_score,StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, chi2, f_regression


In [2]:
df=pd.read_csv('survival.csv',header=0)
# Dropping irrelevant columns
df.drop(['encounter_id','patient_id','hospital_id','icu_id','Unnamed: 83'],inplace=True,axis=1)
# Dealing with negative probabilities
df.drop(df[(df['apache_4a_hospital_death_prob'] <0)].index, inplace=True)
df.drop(df[(df['apache_4a_icu_death_prob'] <0)].index, inplace=True)
df=df.fillna(df.median()) #Mean imputation for numeric features
df = df.fillna(df.mode().iloc[0]) # Mode imputation for categorical features
# Dropping more columns
df.drop(['aids','leukemia','lymphoma'],inplace=True,axis=1)


  df=df.fillna(df.median()) #Mean imputation for numeric features


In [3]:
# Outlier treatment
for col in df.columns:
    if df[col].dtype=='int64' or df[col].dtype=='float64':
        uq=np.percentile(df[col],[99])[0] #Upper Quartile
        df[col][(df[col] > 3*uq)] = 3*uq
        lq=np.percentile(df[col],[1])[0] #Lower quartile
        df[col][(df[col] < 0.3*lq)] = 0.3*lq

#Generating dummy variables
df=pd.get_dummies(df, columns=['ethnicity','gender','icu_admit_source','icu_stay_type','icu_type','apache_3j_bodysystem','apache_2_bodysystem'],drop_first=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col][(df[col] > 3*uq)] = 3*uq
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col][(df[col] < 0.3*lq)] = 0.3*lq


In [13]:
df.columns

Index(['age', 'bmi', 'elective_surgery', 'height', 'pre_icu_los_days',
       'weight', 'apache_2_diagnosis', 'apache_3j_diagnosis',
       'apache_post_operative', 'arf_apache',
       ...
       'apache_3j_bodysystem_Trauma', 'apache_2_bodysystem_Gastrointestinal',
       'apache_2_bodysystem_Haematologic', 'apache_2_bodysystem_Metabolic',
       'apache_2_bodysystem_Neurologic',
       'apache_2_bodysystem_Renal/Genitourinary',
       'apache_2_bodysystem_Respiratory', 'apache_2_bodysystem_Trauma',
       'apache_2_bodysystem_Undefined Diagnoses',
       'apache_2_bodysystem_Undefined diagnoses'],
      dtype='object', length=108)

In [4]:
#Basic model
X=df.loc[:,df.columns!='hospital_death']
Y=df['hospital_death']

X.shape

(89021, 107)

In [5]:

scaler=StandardScaler()
scaler.fit(X)
X=scaler.transform(X)
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,random_state=42)


In [6]:
def cross_val(model,X,Y):
    cfv = StratifiedKFold(n_splits=10, random_state=5, shuffle=True)
    scores = cross_val_score(model, X, Y, scoring='f1', cv=cfv, n_jobs=-1)
    scores1 = cross_val_score(model, X, Y, scoring='precision', cv=cfv, n_jobs=-1)
    scores2 = cross_val_score(model, X, Y, scoring='recall', cv=cfv, n_jobs=-1)
    print("Precison Score: ",np.mean(scores1))
    print("Recall Score: ",np.mean(scores2))
    print("F1 Score: ",np.mean(scores),'\n')

In [10]:
#Logistic Regression
lrm=LogisticRegression(random_state=42)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm,X,Y)
lrm.fit(X_train,Y_train)
print(classification_report(Y_test,lrm.predict(X_test)))


Stratified 10 fold cross validation scores:
Precison Score:  0.6562712698379758
Recall Score:  0.2848587797563325
F1 Score:  0.39716064511894594 

              precision    recall  f1-score   support

           0       0.94      0.99      0.96     24443
           1       0.64      0.28      0.39      2264

    accuracy                           0.93     26707
   macro avg       0.79      0.63      0.67     26707
weighted avg       0.91      0.93      0.91     26707



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [11]:
#Undersampling

cc=ClusterCentroids(sampling_strategy='majority',random_state=52)


In [12]:
X_under,Y_under=cc.fit_resample(X,Y)

In [13]:
lrm1=LogisticRegression(random_state=42)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm1,X_under,Y_under)
X_train,X_test,Y_train,Y_test=train_test_split(X_under,Y_under,test_size=0.3,random_state=42)
lrm1.fit(X_train,Y_train)
print(classification_report(Y_test,lrm1.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.7685631082321673
Recall Score:  0.7542709890012629
F1 Score:  0.7613182873773129 

              precision    recall  f1-score   support

           0       0.75      0.78      0.76      2282
           1       0.77      0.75      0.76      2319

    accuracy                           0.76      4601
   macro avg       0.76      0.76      0.76      4601
weighted avg       0.76      0.76      0.76      4601



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [7]:
#Oversampling

X_new=X.astype(np.uint8)
Y_new=Y.astype(np.uint8)


In [None]:
smt=SMOTE(sampling_strategy='minority',k_neighbors=5,random_state=42)
X_over,Y_over=smt.fit_resample(X_new,Y_new)

In [None]:
Y_new=Y.astype(np.uint8)
smt=SMOTE(sampling_strategy='minority',k_neighbors=5,random_state=42)
X_over,Y_over=smt.fit_resample(X_new,Y_new)

In [15]:
lrm2=LogisticRegression(random_state=42,max_iter=150)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm2,X_over,Y_over)
X_train,X_test,Y_train,Y_test=train_test_split(X_over,Y_over,test_size=0.3,random_state=42)
lrm2.fit(X_train,Y_train)
print(classification_report(Y_test,lrm2.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.9248638821213564
Recall Score:  0.8776947863577934
F1 Score:  0.9006453857958698 

              precision    recall  f1-score   support

           0       0.89      0.93      0.91     24265
           1       0.92      0.89      0.90     24548

    accuracy                           0.91     48813
   macro avg       0.91      0.91      0.91     48813
weighted avg       0.91      0.91      0.91     48813



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [8]:
#Combined resampling

stmk=SMOTETomek(random_state=42)
X_comb,Y_comb=stmk.fit_resample(X_new,Y_new)

In [17]:
lrm3=LogisticRegression(random_state=42,max_iter=150)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm3,X_comb,Y_comb)
X_train,X_test,Y_train,Y_test=train_test_split(X_comb,Y_comb,test_size=0.3,random_state=42)
lrm3.fit(X_train,Y_train)
print(classification_report(Y_test,lrm3.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.9267990335947193
Recall Score:  0.8858116994195072
F1 Score:  0.9058202590452424 

              precision    recall  f1-score   support

           0       0.90      0.93      0.91     24070
           1       0.92      0.89      0.91     24198

    accuracy                           0.91     48268
   macro avg       0.91      0.91      0.91     48268
weighted avg       0.91      0.91      0.91     48268



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [18]:
#Feature engineering -2 

sfm_selector = SelectFromModel(estimator=LogisticRegression()).fit(X,Y)
X=sfm_selector.transform(X)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [19]:
lrm4=LogisticRegression(random_state=42)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm4,X,Y)
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,random_state=42)
lrm4.fit(X_train,Y_train)
print(classification_report(Y_test,lrm4.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.6490027907462363
Recall Score:  0.27325002297786294
F1 Score:  0.3844379094220817 

              precision    recall  f1-score   support

           0       0.94      0.99      0.96     24443
           1       0.63      0.27      0.38      2264

    accuracy                           0.92     26707
   macro avg       0.78      0.63      0.67     26707
weighted avg       0.91      0.92      0.91     26707



In [20]:
#UnderSampling
X_under,Y_under=cc.fit_resample(X,Y)
lrm5=LogisticRegression(random_state=42)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm5,X_under,Y_under)
X_train,X_test,Y_train,Y_test=train_test_split(X_under,Y_under,test_size=0.3,random_state=42)
lrm5.fit(X_train,Y_train)
print(classification_report(Y_test,lrm5.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.7337181865740918
Recall Score:  0.7347078066863879
F1 Score:  0.7340687244268163 

              precision    recall  f1-score   support

           0       0.73      0.74      0.73      2282
           1       0.74      0.73      0.73      2319

    accuracy                           0.73      4601
   macro avg       0.73      0.73      0.73      4601
weighted avg       0.73      0.73      0.73      4601



In [21]:
#oversampling with feature engineering 

X_new=X.astype(np.uint8)
Y_new=Y.astype(np.uint8)
smt=SMOTE(sampling_strategy='minority',k_neighbors=5,random_state=42)
X_over,Y_over=smt.fit_resample(X_new,Y_new)

lrm2=LogisticRegression(random_state=42,max_iter=150)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm2,X_over,Y_over)
X_train,X_test,Y_train,Y_test=train_test_split(X_over,Y_over,test_size=0.3,random_state=42)
lrm2.fit(X_train,Y_train)
print(classification_report(Y_test,lrm2.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.865247873002321
Recall Score:  0.6795116682651832
F1 Score:  0.7611451966740439 

              precision    recall  f1-score   support

           0       0.73      0.90      0.81     24265
           1       0.87      0.67      0.76     24548

    accuracy                           0.78     48813
   macro avg       0.80      0.78      0.78     48813
weighted avg       0.80      0.78      0.78     48813



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [22]:
#Combined resampling

stmk=SMOTETomek(random_state=42)
X_comb,Y_comb=stmk.fit_resample(X_new,Y_new)

lrm3=LogisticRegression(random_state=42,max_iter=150)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm3,X_comb,Y_comb)
X_train,X_test,Y_train,Y_test=train_test_split(X_comb,Y_comb,test_size=0.3,random_state=42)
lrm3.fit(X_train,Y_train)
print(classification_report(Y_test,lrm3.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.8726618847243403
Recall Score:  0.6822731715350215
F1 Score:  0.7657796476912063 

              precision    recall  f1-score   support

           0       0.74      0.89      0.81     24111
           1       0.86      0.69      0.77     24074

    accuracy                           0.79     48185
   macro avg       0.80      0.79      0.79     48185
weighted avg       0.80      0.79      0.79     48185



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [54]:
# feature engineering technique 2 : selectK Best 

print(X.shape)
len(X)
X_clf = SelectKBest(score_func=f_regression,k=50).fit_transform(X,Y)
X_train,X_test,Y_train,Y_test=train_test_split(X_clf,Y,test_size=0.3,random_state=42)

X_clf.shape

(89021, 107)


(89021, 50)

In [56]:
#normal lrm 
lrm=LogisticRegression(random_state=42,max_iter=100)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm,X_clf,Y)
lrm.fit(X_train,Y_train)
print(classification_report(Y_test,lrm.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.6518871113366381
Recall Score:  0.2754690037139035
F1 Score:  0.38711266527491806 

              precision    recall  f1-score   support

           0       0.94      0.99      0.96     24443
           1       0.63      0.27      0.38      2264

    accuracy                           0.92     26707
   macro avg       0.78      0.63      0.67     26707
weighted avg       0.91      0.92      0.91     26707



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [57]:
#UnderSampling
X_under,Y_under=cc.fit_resample(X_clf,Y)
lrm5=LogisticRegression(random_state=42)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm5,X_under,Y_under)
X_train,X_test,Y_train,Y_test=train_test_split(X_under,Y_under,test_size=0.3,random_state=42)
lrm5.fit(X_train,Y_train)
print(classification_report(Y_test,lrm5.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.7269017041798078
Recall Score:  0.7270105630087044
F1 Score:  0.7268751011421173 

              precision    recall  f1-score   support

           0       0.72      0.73      0.73      2282
           1       0.73      0.71      0.72      2319

    accuracy                           0.72      4601
   macro avg       0.72      0.72      0.72      4601
weighted avg       0.72      0.72      0.72      4601



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [58]:
#oversampling with feature engineering 

X_new=X_clf.astype(np.uint8)
Y_new=Y.astype(np.uint8)
smt=SMOTE(sampling_strategy='minority',k_neighbors=5,random_state=42)
X_over,Y_over=smt.fit_resample(X_new,Y_new)

lrm2=LogisticRegression(random_state=42,max_iter=150)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm2,X_over,Y_over)
X_train,X_test,Y_train,Y_test=train_test_split(X_over,Y_over,test_size=0.3,random_state=42)
lrm2.fit(X_train,Y_train)
print(classification_report(Y_test,lrm2.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.901590813544012
Recall Score:  0.7905449038140184
F1 Score:  0.8423126620940742 

              precision    recall  f1-score   support

           0       0.81      0.93      0.87     24265
           1       0.91      0.79      0.85     24548

    accuracy                           0.86     48813
   macro avg       0.86      0.86      0.86     48813
weighted avg       0.86      0.86      0.86     48813



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [59]:
# mixed sampling 

stmk=SMOTETomek(random_state=42)
X_comb,Y_comb=stmk.fit_resample(X_clf,Y)

lrm3=LogisticRegression(random_state=42,max_iter=150)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm3,X_comb,Y_comb)
X_train,X_test,Y_train,Y_test=train_test_split(X_comb,Y_comb,test_size=0.3,random_state=42)
lrm3.fit(X_train,Y_train)
print(classification_report(Y_test,lrm3.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.8016775170943065
Recall Score:  0.7656111652008057
F1 Score:  0.7832197924492446 

              precision    recall  f1-score   support

           0       0.77      0.81      0.79     24223
           1       0.80      0.76      0.78     24589

    accuracy                           0.79     48812
   macro avg       0.79      0.79      0.79     48812
weighted avg       0.79      0.79      0.79     48812



In [33]:
# feature engineering technique 2 : selectK Best 

print(X.shape)
len(X)
X_clf = SelectKBest(score_func=f_regression,k='all').fit_transform(X,Y)
X_train,X_test,Y_train,Y_test=train_test_split(X_clf,Y,test_size=0.3,random_state=42)

X_clf.shape

(89021, 29)


(89021, 29)

In [34]:
#normal lrm 
lrm=LogisticRegression(random_state=42)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm,X_clf,Y)
lrm.fit(X_train,Y_train)
print(classification_report(Y_test,lrm.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.6490027907462363
Recall Score:  0.27325002297786294
F1 Score:  0.3844379094220817 

              precision    recall  f1-score   support

           0       0.94      0.99      0.96     24443
           1       0.63      0.27      0.38      2264

    accuracy                           0.92     26707
   macro avg       0.78      0.63      0.67     26707
weighted avg       0.91      0.92      0.91     26707



In [35]:
#UnderSampling
X_under,Y_under=cc.fit_resample(X_clf,Y)
lrm5=LogisticRegression(random_state=42)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm5,X_under,Y_under)
X_train,X_test,Y_train,Y_test=train_test_split(X_under,Y_under,test_size=0.3,random_state=42)
lrm5.fit(X_train,Y_train)
print(classification_report(Y_test,lrm5.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.7337181865740918
Recall Score:  0.7347078066863879
F1 Score:  0.7340687244268163 

              precision    recall  f1-score   support

           0       0.73      0.74      0.73      2282
           1       0.74      0.73      0.73      2319

    accuracy                           0.73      4601
   macro avg       0.73      0.73      0.73      4601
weighted avg       0.73      0.73      0.73      4601



In [37]:
#oversampling with feature engineering 

X_new=X.astype(np.uint8)
Y_new=Y.astype(np.uint8)
smt=SMOTE(sampling_strategy='minority',k_neighbors=5,random_state=42)
X_over,Y_over=smt.fit_resample(X_new,Y_new)

lrm2=LogisticRegression(random_state=42,max_iter=150)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm2,X_over,Y_over)
X_train,X_test,Y_train,Y_test=train_test_split(X_over,Y_over,test_size=0.3,random_state=42)
lrm2.fit(X_train,Y_train)
print(classification_report(Y_test,lrm2.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.865247873002321
Recall Score:  0.6795116682651832
F1 Score:  0.7611451966740439 

              precision    recall  f1-score   support

           0       0.73      0.90      0.81     24265
           1       0.87      0.67      0.76     24548

    accuracy                           0.78     48813
   macro avg       0.80      0.78      0.78     48813
weighted avg       0.80      0.78      0.78     48813



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [38]:
# mixed sampling 

stmk=SMOTETomek(random_state=42)
X_comb,Y_comb=stmk.fit_resample(X_clf,Y)

lrm3=LogisticRegression(random_state=42,max_iter=150)
print('Stratified 10 fold cross validation scores:')
cross_val(lrm3,X_comb,Y_comb)
X_train,X_test,Y_train,Y_test=train_test_split(X_comb,Y_comb,test_size=0.3,random_state=42)
lrm3.fit(X_train,Y_train)
print(classification_report(Y_test,lrm3.predict(X_test)))

Stratified 10 fold cross validation scores:
Precison Score:  0.8021235958911372
Recall Score:  0.7558445243326462
F1 Score:  0.7782910066696015 

              precision    recall  f1-score   support

           0       0.77      0.82      0.79     24300
           1       0.81      0.76      0.78     24491

    accuracy                           0.79     48791
   macro avg       0.79      0.79      0.79     48791
weighted avg       0.79      0.79      0.79     48791



### Interpretation (SMOTETomek without resampling)

In [9]:
from interpret.glassbox import ExplainableBoostingClassifier

ebm = ExplainableBoostingClassifier()
ebm.fit(X_comb, Y_comb)

In [10]:
from interpret import show

ebm_global = ebm.explain_global()
show(ebm_global)

The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html
The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_table package is deprecated. Please replace
`import dash_table` with `from dash import dash_table`

Also, if you're using any of the table format helpers (e.g. Group), replace 
`from dash_table.Format import Group` with 
`from dash.dash_table.Format import Group`
  import dash_table as dt


In [11]:
X_train,X_test,Y_train,Y_test=train_test_split(X_comb,Y_comb,test_size=0.3,random_state=42)
ebm_local = ebm.explain_local(X_test, Y_test)
show(ebm_local)