In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv(r"heart_disease.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
df.shape

(303, 14)

In [4]:
df.target.value_counts()

1    165
0    138
Name: target, dtype: int64

# Logistic Regression

In [6]:
# example of grid searching key hyperparametres for logistic regression
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression

# # define dataset
X = df.drop(['target'],axis=1)
y = df['target']

# define models and parameters

model = LogisticRegression()
solvers = ['newton-cg', 'lbfgs', 'liblinear']
penalty = ['l2']
c_values = [100, 10, 1.0, 0.1, 0.01]

# define grid search
grid = dict(solver=solvers,penalty=penalty,C=c_values)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.839570 using {'C': 0.1, 'penalty': 'l2', 'solver': 'lbfgs'}
0.819821 (0.080300) with: {'C': 100, 'penalty': 'l2', 'solver': 'newton-cg'}
0.827491 (0.081219) with: {'C': 100, 'penalty': 'l2', 'solver': 'lbfgs'}
0.822007 (0.082031) with: {'C': 100, 'penalty': 'l2', 'solver': 'liblinear'}
0.820932 (0.079320) with: {'C': 10, 'penalty': 'l2', 'solver': 'newton-cg'}
0.824194 (0.081057) with: {'C': 10, 'penalty': 'l2', 'solver': 'lbfgs'}
0.825376 (0.079335) with: {'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}
0.825341 (0.078541) with: {'C': 1.0, 'penalty': 'l2', 'solver': 'newton-cg'}
0.834086 (0.076563) with: {'C': 1.0, 'penalty': 'l2', 'solver': 'lbfgs'}
0.830753 (0.080386) with: {'C': 1.0, 'penalty': 'l2', 'solver': 'liblinear'}
0.836272 (0.068601) with: {'C': 0.1, 'penalty': 'l2', 'solver': 'newton-cg'}
0.839570 (0.071944) with: {'C': 0.1, 'penalty': 'l2', 'solver': 'lbfgs'}
0.837384 (0.073499) with: {'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}
0.759176 (0.074999) with: {

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# Ridge Classifier

In [7]:
# example of grid searching key hyperparametres for ridge classifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import RidgeClassifier

# define models and parameters
model = RidgeClassifier()
alpha = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

# define grid search
grid = dict(alpha=alpha)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.830860 using {'alpha': 0.9}
0.829785 (0.074741) with: {'alpha': 0.1}
0.829785 (0.074741) with: {'alpha': 0.2}
0.829785 (0.074741) with: {'alpha': 0.3}
0.829785 (0.074741) with: {'alpha': 0.4}
0.829785 (0.074741) with: {'alpha': 0.5}
0.829785 (0.074741) with: {'alpha': 0.6}
0.829785 (0.074741) with: {'alpha': 0.7}
0.829785 (0.074741) with: {'alpha': 0.8}
0.830860 (0.073694) with: {'alpha': 0.9}
0.830860 (0.073694) with: {'alpha': 1.0}


# KNN

In [8]:
# example of grid searching key hyperparametres for KNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier

# define models and parameters
model = KNeighborsClassifier()
n_neighbors = range(1, 21, 2)
weights = ['uniform', 'distance']
metric = ['euclidean', 'manhattan', 'minkowski']

# define grid search
grid = dict(n_neighbors=n_neighbors,weights=weights,metric=metric)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.701864 using {'metric': 'manhattan', 'n_neighbors': 19, 'weights': 'uniform'}
0.585520 (0.064357) with: {'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}
0.585520 (0.064357) with: {'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'distance'}
0.624839 (0.062070) with: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}
0.628136 (0.061723) with: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'distance'}
0.649032 (0.063486) with: {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'uniform'}
0.642473 (0.061191) with: {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}
0.657993 (0.078702) with: {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
0.642688 (0.086641) with: {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'distance'}
0.642652 (0.091571) with: {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'uniform'}
0.632760 (0.088583) with: {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'distance'}
0.646953 

# SVM

In [10]:
# example of grid searching key hyperparametres for SVC
from sklearn.svm import SVC

# define model and parameters
model = SVC()
kernel = ['poly', 'rbf', 'sigmoid']
C = [50, 10, 1.0, 0.1, 0.01]
gamma = ['scale']

# define grid search
grid = dict(kernel=kernel,C=C,gamma=gamma)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.754731 using {'C': 50, 'gamma': 'scale', 'kernel': 'poly'}
0.754731 (0.069967) with: {'C': 50, 'gamma': 'scale', 'kernel': 'poly'}
0.708351 (0.069602) with: {'C': 50, 'gamma': 'scale', 'kernel': 'rbf'}
0.516882 (0.092207) with: {'C': 50, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.705161 (0.071357) with: {'C': 10, 'gamma': 'scale', 'kernel': 'poly'}
0.684122 (0.070781) with: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
0.514624 (0.093165) with: {'C': 10, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.665484 (0.071242) with: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
0.648136 (0.063882) with: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
0.545663 (0.016990) with: {'C': 1.0, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.556631 (0.023264) with: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
0.544516 (0.012851) with: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
0.544516 (0.012851) with: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.544516 (0.012851) with: {'C': 0.01, 'gamma': 'scale',

# Bagged Decision Trees 

In [11]:
# example of grid searching key hyperparameters for BaggingClassifier
from sklearn.ensemble import BaggingClassifier

# define models and parameters
model = BaggingClassifier()
n_estimators = [10, 100, 1000]

# define grid search
grid = dict(n_estimators=n_estimators)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.792366 using {'n_estimators': 1000}
0.775878 (0.065296) with: {'n_estimators': 10}
0.791254 (0.076151) with: {'n_estimators': 100}
0.792366 (0.066100) with: {'n_estimators': 1000}


# RandomForest

In [12]:
from sklearn.ensemble import RandomForestClassifier

# define models and parameters
model = RandomForestClassifier()
n_estimators = [10, 100, 1000]
max_features = ['sqrt', 'log2']

# define grid search
grid = dict(n_estimators=n_estimators,max_features=max_features)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.825341 using {'max_features': 'log2', 'n_estimators': 1000}
0.791326 (0.067903) with: {'max_features': 'sqrt', 'n_estimators': 10}
0.824229 (0.068500) with: {'max_features': 'sqrt', 'n_estimators': 100}
0.820824 (0.057606) with: {'max_features': 'sqrt', 'n_estimators': 1000}
0.794803 (0.073765) with: {'max_features': 'log2', 'n_estimators': 10}
0.810932 (0.061879) with: {'max_features': 'log2', 'n_estimators': 100}
0.825341 (0.062680) with: {'max_features': 'log2', 'n_estimators': 1000}


# Stochastic Gradient Boosting

In [14]:
from sklearn.ensemble import GradientBoostingClassifier

# define models and parameters
model = GradientBoostingClassifier()
n_estimators = [10, 100, 1000]
learning_rate = [0.001, 0.01, 0.1]
subsample = [0.5, 0.7, 1.0]
max_depth = [3, 7, 9]

# define grid search
grid = dict(learning_rate=learning_rate, n_estimators=n_estimators, subsample=subsample, max_depth=max_depth)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X, y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.825376 using {'learning_rate': 0.001, 'max_depth': 3, 'n_estimators': 1000, 'subsample': 0.5}
0.544516 (0.012851) with: {'learning_rate': 0.001, 'max_depth': 3, 'n_estimators': 10, 'subsample': 0.5}
0.544516 (0.012851) with: {'learning_rate': 0.001, 'max_depth': 3, 'n_estimators': 10, 'subsample': 0.7}
0.544516 (0.012851) with: {'learning_rate': 0.001, 'max_depth': 3, 'n_estimators': 10, 'subsample': 1.0}
0.653620 (0.064332) with: {'learning_rate': 0.001, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.5}
0.683405 (0.060372) with: {'learning_rate': 0.001, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.7}
0.711147 (0.072615) with: {'learning_rate': 0.001, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1.0}
0.825376 (0.067934) with: {'learning_rate': 0.001, 'max_depth': 3, 'n_estimators': 1000, 'subsample': 0.5}
0.812258 (0.075842) with: {'learning_rate': 0.001, 'max_depth': 3, 'n_estimators': 1000, 'subsample': 0.7}
0.794588 (0.077964) with: {'learning_rate': 0.001, 