## Setup the dataset

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
    header=None)
df.columns = [
    "Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
    "MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
    "CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
]
# df = df.sample(frac=0.1, random_state=1)
train_cols = df.columns[0:-1]
label = df.columns[-1]
X = df[train_cols]
y = df[label].apply(lambda x: 0 if x == " <=50K" else 1) #Turning response into 0 and 1

seed = 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)

## Explore the dataset

In [None]:
from interpret import show
from interpret.data import ClassHistogram

hist = ClassHistogram().explain_data(X_train, y_train, name = 'Train Data')
show(hist)

## Training multiple EBM models

In [None]:
from interpret.glassbox import ExplainableBoostingClassifier

# Fitting multiple EBM models with different training datasets and random seeds
seed =1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)
ebm1 = ExplainableBoostingClassifier(random_state=seed, n_jobs=-1)

ebm1.fit(X_train, y_train)  

seed +=10
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)

ebm2 = ExplainableBoostingClassifier(random_state=seed, n_jobs=-1)
ebm2.fit(X_train, y_train)  

seed +=10
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)

ebm3 = ExplainableBoostingClassifier(random_state=seed, n_jobs=-1)
ebm3.fit(X_train, y_train)

## Merging multiple trained EBM models

In [None]:
#Merging multiple EBM models

from interpret.glassbox.ebm.utils import *
from interpret import show

models = [ebm1, ebm2 , ebm3]
merged_ebm = merge_ebms(models=models)

ebm_global = merged_ebm.explain_global(name='EBM')
show(ebm_global)