In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [8]:
df = pd.read_csv("sample_data/process_outlier_data.csv")
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,0,0,140,289,0,0,172,0,0.0,0,0
1,49,1,1,160,180,0,0,156,0,1.0,1,1
2,37,0,0,130,283,0,1,98,0,0.0,0,0
3,48,1,2,138,214,0,0,108,1,1.5,1,1
4,54,0,1,150,195,0,0,122,0,0.0,0,0


In [9]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn import metrics
from sklearn.model_selection import KFold,cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold


def train_model_with_tunning(model,param, X_std, Y):
    
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    
    X_train, X_test, y_train, y_test = train_test_split(X_std, Y, test_size=0.30, random_state=0)
    reg_random_cv = RandomizedSearchCV(estimator = model,param_distributions = param,
               n_iter = 100, cv = cv, verbose=2, random_state=0, n_jobs = -1,scoring='f1_weighted')
    reg_random_cv.fit(X_std, Y)
    
    print('Random grid: ', reg_random_cv, '\n')
    print('Best Parameters: ', reg_random_cv.best_params_, ' \n')
    print('Best Score: ', reg_random_cv.best_score_, ' \n')
    print('Best Estimatore : ', reg_random_cv.best_estimator_ ,'\n')
    
    reg_model = reg_random_cv.best_estimator_ 

    reg_model.fit(X_train,y_train)

    y_train_pred = reg_model.predict(X_train)
    y_test_pred = reg_model.predict(X_test)
    
    acc_train=metrics.accuracy_score(y_train,y_train_pred)
    print("Train Accuracy : ",acc_train*100)
    acc_test=metrics.accuracy_score(y_test,y_test_pred)
    print("Test Accuracy : ",acc_test*100)


    #validate model performance using K-fold Cross Validation using roc scoring
    kf=KFold(n_splits=5,shuffle=True,random_state=0)
    scores=cross_val_score(reg_model,X_std,Y,cv=cv,scoring='f1_weighted')
    print("Bias Error : ",np.mean(scores))

    print("Varience : ",np.std(scores, ddof= 1))

In [10]:
X=df[['Age','Sex','ChestPainType','Cholesterol','MaxHR','RestingBP','FastingBS','RestingECG','ExerciseAngina', 'Oldpeak', 'ST_Slope']]
Y=df['HeartDisease']

In [11]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_std=sc.fit_transform(X)

In [12]:
import xgboost as xgb

In [13]:
xgbc =  xgb.XGBClassifier(objective='binary:logistic',
                          booster='gbtree',
                          eval_metric='auc',
                          tree_method='hist',
                          grow_policy='lossguide',
                          use_label_encoder=False)

#setting grid of selected parameters for iteration
param_grid = {'gamma': [0,0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4, 200],
              'learning_rate': [0.01, 0.03, 0.06, 0.1, 0.15, 0.2, 0.25, 0.300000012, 0.4, 0.5, 0.6, 0.7],
              'max_depth': [5,6,7,8,9,10,11,12,13,14],
              'n_estimators': [50,65,80,100,115,130,150],
              'reg_alpha': [0,0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4,200],
              'reg_lambda': [0,0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4,200]}


train_model_with_tunning(model=xgbc,param=param_grid, X_std=X_std, Y=Y)

Fitting 30 folds for each of 100 candidates, totalling 3000 fits
Random grid:  RandomizedSearchCV(cv=RepeatedStratifiedKFold(n_repeats=3, n_splits=10, random_state=1),
                   estimator=XGBClassifier(eval_metric='auc',
                                           grow_policy='lossguide',
                                           tree_method='hist',
                                           use_label_encoder=False),
                   n_iter=100, n_jobs=-1,
                   param_distributions={'gamma': [0, 0.1, 0.2, 0.4, 0.8, 1.6,
                                                  3.2, 6.4, 12.8, 25.6, 51.2,
                                                  102.4, 200],
                                        'learning_rate': [0.01, 0.03, 0.06, 0.1,
                                                          0.15, 0.2, 0.25,
                                                          0.300000012, 0.4, 0.5,
                                                          0.6, 0.7],
   

In [15]:
!pip install catboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting catboost
  Downloading catboost-1.0.6-cp37-none-manylinux1_x86_64.whl (76.6 MB)
[K     |████████████████████████████████| 76.6 MB 1.2 MB/s 
Installing collected packages: catboost
Successfully installed catboost-1.0.6


In [17]:
from catboost import CatBoostClassifier

In [18]:
CBC = CatBoostClassifier()

parameters = {'depth'         : [4,5,6,7,8,9, 10],
              'learning_rate' : [0.01,0.02,0.03,0.04],
              'iterations'    : [10, 20,30,40,50,60,70,80,90, 100]
              }

train_model_with_tunning(model=CBC,param=parameters, X_std=X_std, Y=Y)

Fitting 30 folds for each of 100 candidates, totalling 3000 fits
0:	learn: 0.6754095	total: 48.9ms	remaining: 4.84s
1:	learn: 0.6576889	total: 50.1ms	remaining: 2.46s
2:	learn: 0.6481607	total: 57.6ms	remaining: 1.86s
3:	learn: 0.6346660	total: 63.8ms	remaining: 1.53s
4:	learn: 0.6200561	total: 64.3ms	remaining: 1.22s
5:	learn: 0.6091839	total: 70.3ms	remaining: 1.1s
6:	learn: 0.6015428	total: 76.3ms	remaining: 1.01s
7:	learn: 0.5898615	total: 77.5ms	remaining: 892ms
8:	learn: 0.5811315	total: 83.4ms	remaining: 843ms
9:	learn: 0.5730162	total: 89.3ms	remaining: 804ms
10:	learn: 0.5666499	total: 95.1ms	remaining: 769ms
11:	learn: 0.5600360	total: 101ms	remaining: 740ms
12:	learn: 0.5502056	total: 103ms	remaining: 689ms
13:	learn: 0.5426242	total: 113ms	remaining: 696ms
14:	learn: 0.5358505	total: 119ms	remaining: 677ms
15:	learn: 0.5287709	total: 125ms	remaining: 658ms
16:	learn: 0.5219322	total: 131ms	remaining: 640ms
17:	learn: 0.5152282	total: 137ms	remaining: 624ms
18:	learn: 0.5074

In [20]:
!pip install lightgbm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [22]:
import lightgbm as lgb

SEARCH_PARAMS = {'learning_rate': 0.4,
                'max_depth': 15,
                'num_leaves': 32,
                'feature_fraction': 0.8,
                'subsample': 0.2}

FIXED_PARAMS={'objective': 'binary',
             'metric': 'auc',
             'is_unbalance':True,
             'bagging_freq':5,
             'boosting':'dart',
             'num_boost_round':300,
             'early_stopping_rounds':30}

X_train, X_test, y_train, y_test = train_test_split(X_std, Y, test_size=0.30, random_state=0)

train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

params = {'metric':FIXED_PARAMS['metric'],
          'objective':FIXED_PARAMS['objective'],
          **SEARCH_PARAMS}

model = lgb.train(params, train_data,                     
                  valid_sets=[valid_data],
                  num_boost_round=FIXED_PARAMS['num_boost_round'],
                  early_stopping_rounds=FIXED_PARAMS['early_stopping_rounds'],
                  valid_names=['valid'])
score = model.best_score['valid']['auc']

[1]	valid's auc: 0.882099
Training until validation scores don't improve for 30 rounds.
[2]	valid's auc: 0.920381
[3]	valid's auc: 0.916068
[4]	valid's auc: 0.910137
[5]	valid's auc: 0.920336
[6]	valid's auc: 0.922223
[7]	valid's auc: 0.926761
[8]	valid's auc: 0.926222
[9]	valid's auc: 0.928918
[10]	valid's auc: 0.928828
[11]	valid's auc: 0.929727
[12]	valid's auc: 0.935748
[13]	valid's auc: 0.931254
[14]	valid's auc: 0.930985
[15]	valid's auc: 0.931794
[16]	valid's auc: 0.929907
[17]	valid's auc: 0.930536
[18]	valid's auc: 0.930895
[19]	valid's auc: 0.930985
[20]	valid's auc: 0.928828
[21]	valid's auc: 0.931075
[22]	valid's auc: 0.929008
[23]	valid's auc: 0.929727
[24]	valid's auc: 0.929817
[25]	valid's auc: 0.929008
[26]	valid's auc: 0.92793
[27]	valid's auc: 0.929367
[28]	valid's auc: 0.929457
[29]	valid's auc: 0.931524
[30]	valid's auc: 0.933321
[31]	valid's auc: 0.932782
[32]	valid's auc: 0.931075
[33]	valid's auc: 0.930625
[34]	valid's auc: 0.930895
[35]	valid's auc: 0.931434
[36

In [27]:
score

0.9357476635514018

In [25]:
model.best_iteration

12

In [26]:
model.params

{'feature_fraction': 0.8,
 'learning_rate': 0.4,
 'max_depth': 15,
 'metric': 'auc',
 'num_leaves': 32,
 'objective': 'binary',
 'subsample': 0.2}