In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df=pd.read_csv("/kaggle/input/heart-disease-data-compiled-from-uci/UCI_Heart_Disease_Dataset_Combined.csv")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.columns

In [None]:
df.head()

In [None]:
duplicate_rows_df = df[df.duplicated()]
print("\nDuplicate Rows except the first occurrence based on all columns are:")
print(duplicate_rows_df)

In [None]:
df = df.drop_duplicates()
print("\nDataFrame after removing duplicates:")
print(df)

In [None]:
import matplotlib.pyplot as plt 
plt.hist(df["HeartDisease"])

In [None]:
df = pd.get_dummies(df, columns=['ChestPainType'])
df.head()

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier,ExtraTreesClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

X = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']

X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.1111, random_state=42)

classifiers = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
#     'SVM': SVC(),
    'XGBoost': XGBClassifier(),
    'LightGBM': LGBMClassifier(),
    'CatBoost': CatBoostClassifier(silent=True),
    'GradientBoost':GradientBoostingClassifier(),
    'ExtraTrees':ExtraTreesClassifier()
}

# Train and test each classifier
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    test_predictions = clf.predict(X_test)
    test_accuracy = accuracy_score(y_test, test_predictions)
    test_precision = precision_score(y_test, test_predictions)
    test_recall = recall_score(y_test, test_predictions)
    test_f1 = f1_score(y_test, test_predictions)
    print(f'{name} test Accuracy: {test_accuracy}, Precision: {test_precision}, Recall: {test_recall}, F1 Score: {test_f1}')
    
    # val_predictions = clf.predict(X_val)
    # val_accuracy = accuracy_score(y_val, val_predictions)
    # val_precision = precision_score(y_val, val_predictions)
    # val_recall = recall_score(y_val, val_predictions)
    # val_f1 = f1_score(y_val, val_predictions)
    # print(f'{name} validation Accuracy: {val_accuracy}, Precision: {val_precision}, Recall: {val_recall}, F1 Score: {val_f1}')

In [None]:
import tensorflow as tf
from sklearn.model_selection import train_test_split


X_train = X_train.astype('float32')
y_train = y_train.astype('float32')
X_test = X_test.astype('float32')
y_test = y_test.astype('float32')
# Define the model architecture
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # adjust this according to your problem
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])  # adjust this according to your problem

# Define the checkpoint callback
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("my_model.keras", save_best_only=True)

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, callbacks=[checkpoint_cb])

# Evaluate the model on the train set
train_loss, train_acc = model.evaluate(X_train, y_train, verbose=2)
print('\nTrain accuracy:', train_acc)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)

In [None]:
import optuna
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, f1_score
X_train=X_train.astype('int')
X_test=X_test.astype('int')
X_val=X_val.astype('int')
def objective(trial):
    param = {
        'iterations' : trial.suggest_int('iterations', 50, 300),
        'depth' : trial.suggest_int('depth', 4, 10),
        'learning_rate' : trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'random_strength' : trial.suggest_int('random_strength', 0, 100),
        'bagging_temperature' : trial.suggest_float('bagging_temperature', 0.01, 100.00, log=True),
        'od_type': trial.suggest_categorical('od_type', ['IncToDec', 'Iter']),
        'od_wait' : trial.suggest_int('od_wait', 10, 50)
    }
    model = CatBoostClassifier(**param)
    model.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=100, verbose=False)
    preds = model.predict(X_test)
    accuracy = f1_score(y_test, preds,average="weighted")
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1000)

# Best hyperparameters
best_params = study.best_params

# Fit the model with best hyperparameters
best_model = CatBoostClassifier(**best_params)
best_model.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=100, verbose=False)

# Make predictions 
preds = best_model.predict(X_test)

# Check the accuracy and F1 score of the model
print("Model Accuracy: %f" % accuracy_score(y_test, preds))
print("Model F1 Score: %f" % f1_score(y_test, preds, average='weighted'))

# Now let's use the model with the best parameters on the validation set
val_preds = best_model.predict(X_val)

# Check the accuracy and F1 score of the best model on the validation set
print("Validation Accuracy: %f" % accuracy_score(y_val, val_preds))
print("Validation F1 Score: %f" % f1_score(y_val, val_preds, average='weighted'))

In [None]:
best_model.save_model("CB",format="cbm")

In [None]:
import optuna
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score

def objective(trial):
    param = {
        'C': trial.suggest_float('C', 1e-5, 100,log=True),
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        # 'penalty': trial.suggest_categorical('penalty', ['l2','none']),
        'solver': trial.suggest_categorical('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
        'max_iter': trial.suggest_int('max_iter', 50, 200),
        'tol': trial.suggest_float('tol', 1e-5, 1e-1),
        'class_weight': trial.suggest_categorical('class_weight', [None, 'balanced']),
        'warm_start': trial.suggest_categorical('warm_start', [True, False]),
        'l1_ratio': trial.suggest_float('l1_ratio', 0, 1)
    }
    model = LogisticRegression(**param)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    accuracy = f1_score(y_test, preds, average='weighted')
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1000)

# Best hyperparameters
best_params = study.best_params

# Fit the model with best hyperparameters
best_model = LogisticRegression(**best_params)
best_model.fit(X_train, y_train)

# Make predictions 
preds = best_model.predict(X_test)

# Check the accuracy and F1 score of the model
print("Model Accuracy: %f" % accuracy_score(y_test, preds))
print("Model F1 Score: %f" % f1_score(y_test, preds, average='weighted'))

# Now let's use the model with the best parameters on the validation set
val_preds = best_model.predict(X_val)

# Check the accuracy and F1 score of the best model on the validation set
print("Validation Accuracy: %f" % accuracy_score(y_val, val_preds))
print("Validation F1 Score: %f" % f1_score(y_val, val_preds, average='weighted'))

In [None]:
import pickle

pickle.dump(best_model, open("LR", 'wb'))

In [None]:
import optuna
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score

def objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1,log=True),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 5),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 4),
    }
    model = GradientBoostingClassifier(**param)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    accuracy = f1_score(y_test, preds,average="weighted")
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1000)

# Best hyperparameters
best_params = study.best_params

# Fit the model with best hyperparameters
best_model = GradientBoostingClassifier(**best_params)
best_model.fit(X_train, y_train)

# Make predictions 
preds = best_model.predict(X_test)

# Check the accuracy and F1 score of the model
print("Model Accuracy: %f" % accuracy_score(y_test, preds))
print("Model F1 Score: %f" % f1_score(y_test, preds, average='weighted'))

# Now let's use the model with the best parameters on the validation set
val_preds = best_model.predict(X_val)

# Check the accuracy and F1 score of the best model on the validation set
print("Validation Accuracy: %f" % accuracy_score(y_val, val_preds))
print("Validation F1 Score: %f" % f1_score(y_val, val_preds, average='weighted'))

In [None]:
pickle.dump(best_model, open("GB", 'wb'))

In [None]:
import optuna
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score

def objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1500),
        'max_depth': trial.suggest_int('max_depth', 2, 32),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 5),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 4),
        'max_features': trial.suggest_int('max_features', 1,1500),
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
        'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy'])
    }
    model = RandomForestClassifier(**param)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    accuracy = f1_score(y_val, preds,average="weighted")
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

# Best hyperparameters
best_params = study.best_params

# Fit the model with best hyperparameters
best_model = RandomForestClassifier(**best_params)
best_model.fit(X_train, y_train)

# Make predictions 
preds = best_model.predict(X_test)

# Check the accuracy and F1 score of the model
print("Model Accuracy: %f" % accuracy_score(y_test, preds))
print("Model F1 Score: %f" % f1_score(y_test, preds, average='weighted'))

In [None]:
pickle.dump(best_model, open("RF", 'wb'))

In [None]:
from tabnanny import verbose
import optuna
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, f1_score

def objective(trial):
    param = {
        'num_leaves': trial.suggest_int('num_leaves', 2, 500),
        'max_depth': trial.suggest_int('max_depth', 2, 128),
        'learning_rate': trial.suggest_float('learning_rate', 1e-8, 1.0,log=True),
        'n_estimators': trial.suggest_int('n_estimators', 10, 1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'min_child_weight': trial.suggest_float('min_child_weight', 1e-5, 1,log=True),
        'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 1, 0.1),
        'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 1, 0.1),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-5, 10,log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-5, 10,log=True)
    }
    model = LGBMClassifier(**param,verbose=-1)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    accuracy = f1_score(y_val, preds, average="weighted")
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1000)

# Best hyperparameters
best_params = study.best_params

# Fit the model with best hyperparameters
best_model = LGBMClassifier(**best_params)
best_model.fit(X_train, y_train)

# Make predictions 
preds = best_model.predict(X_test)

# Check the accuracy and F1 score of the model
print("Model Accuracy: %f" % accuracy_score(y_test, preds))
print("Model F1 Score: %f" % f1_score(y_test, preds, average='weighted'))

In [None]:
pickle.dump(best_model, open("LGBM", 'wb'))

In [None]:
import optuna
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score, f1_score

def objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1500),
        'max_depth': trial.suggest_int('max_depth', 2, 32),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 5),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 4),
        'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2']),
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
        'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy'])
    }
    model = ExtraTreesClassifier(**param)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    accuracy = f1_score(y_val, preds, average="weighted")
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=500)

# Best hyperparameters
best_params = study.best_params

# Fit the model with best hyperparameters
best_model = ExtraTreesClassifier(**best_params)
best_model.fit(X_train, y_train)

# Make predictions 
preds = best_model.predict(X_test)

# Check the accuracy and F1 score of the model
print("Model Accuracy: %f" % accuracy_score(y_test, preds))
print("Model F1 Score: %f" % f1_score(y_test, preds, average='weighted'))

In [None]:
pickle.dump(best_model, open("ET", 'wb'))

In [None]:
import optuna
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score

def objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 1000),
        'max_depth': trial.suggest_int('max_depth', 2, 32),
        'learning_rate': trial.suggest_float('learning_rate', 1e-8, 1.0,log=True),
        'gamma': trial.suggest_float('gamma', 0.1, 1),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1, step=0.1),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1, step=0.1),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-5, 10,log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-5, 10,log=True)
    }
    model = XGBClassifier(**param)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    accuracy = f1_score(y_val, preds,average="weighted")
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1000)

# Best hyperparameters
best_params = study.best_params

# Fit the model with best hyperparameters
best_model = XGBClassifier(**best_params)
best_model.fit(X_train, y_train)

# Make predictions 
preds = best_model.predict(X_test)

# Check the accuracy and F1 score of the model
print("Model Accuracy: %f" % accuracy_score(y_test, preds))
print("Model F1 Score: %f" % f1_score(y_test, preds, average='weighted'))

In [None]:
pickle.dump(best_model, open("XGB", 'wb'))

In [None]:
import pickle

In [None]:

lr=pickle.load(open('LR', 'rb'))
gb=pickle.load(open("GB", 'rb'))
rf=pickle.load(open("RF", 'rb'))
lgbm=pickle.load(open("LGBM", 'rb'))
et=pickle.load(open("ET", 'rb'))
xgb=pickle.load(open("XGB", 'rb'))

In [None]:
cb=CatBoostClassifier()
cb.load_model("CB")

In [None]:
p1=cb.predict(X_val)
print(f1_score(p1,y_val,average="weighted"))
print("\n")
p2=cb.predict(X_test)
print(f1_score(p2,y_test,average="weighted"))
print("\n")

p1=lr.predict(X_val)
print(f1_score(p1,y_val,average="weighted"))
print("\n")
p2=lr.predict(X_test)
print(f1_score(p2,y_test,average="weighted"))
print("\n")

p1=gb.predict(X_val)
print(f1_score(p1,y_val,average="weighted"))
print("\n")
p2=gb.predict(X_test)
print(f1_score(p2,y_test,average="weighted"))
print("\n")

p1=rf.predict(X_val)
print(f1_score(p1,y_val,average="weighted"))
print("\n")
p2=rf.predict(X_test)
print(f1_score(p2,y_test,average="weighted"))
print("\n")

p1=lgbm.predict(X_val)
print(f1_score(p1,y_val,average="weighted"))
print("\n")
p2=lgbm.predict(X_test)
print(f1_score(p2,y_test,average="weighted"))
print("\n")

p1=et.predict(X_val)
print(f1_score(p1,y_val,average="weighted"))
print("\n")
p2=et.predict(X_test)
print(f1_score(p2,y_test,average="weighted"))
print("\n")

p1=xgb.predict(X_val)
print(f1_score(p1,y_val,average="weighted"))
print("\n")
p2=xgb.predict(X_test)
print(f1_score(p2,y_test,average="weighted"))
print("\n")

In [None]:
from scipy.optimize import minimize
from sklearn.metrics import f1_score
import numpy as np

preds_cb = cb.predict(X_val)
preds_lr = lr.predict(X_val)
preds_gb = gb.predict(X_val)
preds_rf = rf.predict(X_val)
preds_lgbm = lgbm.predict(X_val)
preds_et = et.predict(X_val)
preds_xgb = xgb.predict(X_val)

# Stack predictions
preds = np.vstack([preds_cb, preds_lr, preds_gb, preds_rf, preds_lgbm,preds_et,preds_xgb])

def loss_func(weights):
    
    final_prediction = np.average(preds, axis=0, weights=weights)
    final_prediction = [1 if prob > 0.5 else 0 for prob in final_prediction]
    return 1 - f1_score(y_val, final_prediction, average='weighted')

# The algorithm needs a starting value, let's start with equal weights
starting_values = [1/7,1/7,1/7,1/7,1/7,1/7,1/7]

# Our weights are bound between 0 and 1
bounds = [(0, 1)]*7

# We want our weights to sum to 1
cons = ({'type':'eq','fun':lambda w: 1-sum(w)})

# We use 'SLSQP' as our solver, SLSQP stands for Sequential Least Squares Programming
res = minimize(loss_func, starting_values, method='SLSQP', bounds=bounds, constraints=cons)

print('Ensemble Weights: {weights}'.format(weights=res['x']))

from sklearn.metrics import classification_report, precision_score, recall_score, f1_score

# Get the predictions from each model on the test set
preds_cb = cb.predict(X_test)
preds_lr = lr.predict(X_test)
preds_gb = gb.predict(X_test)
preds_rf = rf.predict(X_val)
preds_lgbm = lgbm.predict(X_val)
preds_et = et.predict(X_val)
preds_xgb = xgb.predict(X_val)

# Stack the predictions together
preds = np.vstack([preds_cb, preds_lr, preds_gb, preds_rf, preds_lgbm,preds_et,preds_xgb])

# Calculate the weighted average of predictions
final_preds = np.average(preds, axis=0, weights=res['x'])

# Convert probabilities to class labels
final_preds = [1 if prob > 0.5 else 0 for prob in final_preds]

# Print the classification report
print(classification_report(y_test, final_preds))

# Print Precision, Recall and F1 Score
print("Precision: %f" % precision_score(y_test, final_preds))
print("Recall: %f" % recall_score(y_test, final_preds))
print("F1 Score: %f" % f1_score(y_test, final_preds))

In [None]:
preds_cb = cb.predict(X_val)
preds_lr = lr.predict(X_val)
preds_gb = gb.predict(X_val)
preds_rf = rf.predict(X_val)
preds_lgbm = lgbm.predict(X_val)
preds_et = et.predict(X_val)
preds_xgb = xgb.predict(X_val)
# Stack the predictions together
preds = np.vstack([preds_cb, preds_lr, preds_gb, preds_rf, preds_lgbm,preds_et,preds_xgb])

# Calculate the weighted average of predictions
final_preds = np.average(preds, axis=0, weights=res['x'])

# Convert probabilities to class labels
final_preds = [1 if prob > 0.5 else 0 for prob in final_preds]

# Print the classification report
print(classification_report(y_val, final_preds))

# Print Precision, Recall and F1 Score
print("Precision: %f" % precision_score(y_val, final_preds))
print("Recall: %f" % recall_score(y_val, final_preds))
print("F1 Score: %f" % f1_score(y_val, final_preds))

In [None]:
from scipy.optimize import minimize
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

preds_cb = cb.predict(X_val)
preds_lr = lr.predict(X_val)
preds_gb = gb.predict(X_val)
preds_rf = rf.predict(X_val)
preds_lgbm = lgbm.predict(X_val)
preds_et = et.predict(X_val)
preds_xgb = xgb.predict(X_val)
# Stack predictions
preds = np.vstack([preds_cb, preds_lr, preds_gb, preds_rf, preds_lgbm,preds_et,preds_xgb])

def loss_func(weights):
    
    final_prediction = np.average(preds, axis=0, weights=weights)
    # Convert probabilities to class labels
    final_prediction = [1 if prob > 0.5 else 0 for prob in final_prediction]
    return 1 - f1_score(y_val, final_prediction, average='weighted')

# Our weights are bound between 0 and 1
bounds = [(0, 1)]*7

# We want our weights to sum to 1
cons = ({'type':'eq','fun':lambda w: 1-sum(w)})

# Number of random starting points
num_starts = 10

best_score = np.inf
best_weights = None

# Perform optimization with several randomly chosen starting points
for _ in range(num_starts):
    # Randomly choose starting weights
    values = np.random.rand(7)
    starting_values = values / np.sum(values)

    res = minimize(loss_func, starting_values, method='SLSQP', bounds=bounds, constraints=cons)

    if res.fun < best_score:
        best_score = res.fun
        best_weights = res.x

# Calculate the weighted average of predictions
final_preds = np.average(preds, axis=0, weights=best_weights)

# Convert probabilities to class labels
final_preds = [1 if prob > 0.5 else 0 for prob in final_preds]

print('Best Ensemble Weights: {weights}'.format(weights=best_weights))

print(classification_report(y_test, final_preds))

# Print Precision, Recall and F1 Score
print("Precision: %f" % precision_score(y_test, final_preds))
print("Recall: %f" % recall_score(y_test, final_preds))
print("F1 Score: %f" % f1_score(y_test, final_preds))

In [None]:
preds_cb = cb.predict(X_val)
preds_lr = lr.predict(X_val)
preds_gb = gb.predict(X_val)
preds_rf = rf.predict(X_val)
preds_lgbm = lgbm.predict(X_val)
preds_et = et.predict(X_val)
preds_xgb = xgb.predict(X_val)
# Stack the predictions together
preds = np.vstack([preds_cb, preds_lr, preds_gb, preds_rf, preds_lgbm,preds_et,preds_xgb])

# Calculate the weighted average of predictions
final_preds = np.average(preds, axis=0, weights=best_weights)

# Convert probabilities to class labels
final_preds = [1 if prob > 0.5 else 0 for prob in final_preds]

# Print the classification report
print(classification_report(y_val, final_preds))

# Print Precision, Recall and F1 Score
print("Precision: %f" % precision_score(y_val, final_preds))
print("Recall: %f" % recall_score(y_val, final_preds))
print("F1 Score: %f" % f1_score(y_val, final_preds))

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np

models = [cb, lr, gb,rf,lgbm,et,xgb]
model_names = ['cb', 'lr', 'gb', 'rf', 'lgbm','et','xgb']

# Get the predictions from each model on the validation set
preds = [model.predict(X_val) for model in models]

# Calculate Disagreement Measure and Correlation of Errors
for i in range(len(models)):
    for j in range(i+1, len(models)):
        # Disagreement Measure
        disagree = np.mean(preds[i] != preds[j])
        print(f'Disagreement Measure between {model_names[i]} and {model_names[j]}: {disagree}')
        
        # Correlation of Errors
        errors_i = preds[i] != y_val
        errors_j = preds[j] != y_val
        correlation = np.corrcoef(errors_i, errors_j)[0, 1]
        print(f'Correlation of Errors between {model_names[i]} and {model_names[j]}: {correlation}')

# WEBAPP
### Store this in html file

In [None]:
<!DOCTYPE html>
<html>
<head>
    <title>Heart Disease Prediction</title>
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
</head>
<body>
    <h1>Heart Disease Prediction</h1>
    <form id="predict-form">
        <label for="age">Age:</label><br>
        <input type="number" id="age" name="age"><br>
        <label for="sex">Sex:</label><br>
        <select id="sex" name="sex">
            <option value="Male">Male</option>
            <option value="Female">Female</option>
        </select><br>
        <label for="chestPainType">Chest Pain Type:</label><br>
        <select id="chestPainType" name="chestPainType">
            <option value="Typical angina">Typical angina</option>
            <option value="Atypical angina">Atypical angina</option>
            <option value="Non-anginal pain">Non-anginal pain</option>
            <option value="Asymptomatic">Asymptomatic</option>
        </select><br>
        <label for="restingBP">Resting Blood Pressure:</label><br>
        <input type="number" id="restingBP" name="restingBP"><br>
        <label for="cholesterol">Cholesterol:</label><br>
        <input type="number" id="cholesterol" name="cholesterol"><br>
        <label for="fastingBS">Fasting Blood Sugar:</label><br>
        <input type="number" id="fastingBS" name="fastingBS"><br>
        <label for="maxHR">Maximum Heart Rate:</label><br>
        <input type="number" id="maxHR" name="maxHR"><br>
        <label for="exerciseAngina">Exercise Induced Angina:</label><br>
        <select id="exerciseAngina" name="exerciseAngina">
            <option value="Yes">Yes</option>
            <option value="No">No</option>
        </select><br>
        <input type="submit" value="Predict">
    </form>
    <p id="prediction"></p>

    <script>
        $("#predict-form").submit(function(event) {
            event.preventDefault();
            var data = {
                'Age': parseInt($("#age").val()),
                'Sex': $("#sex").val(),
                'ChestPainType': $("#chestPainType").val(),
                'RestingBP': parseInt($("#restingBP").val()),
                'Cholesterol': parseInt($("#cholesterol").val()),
                'FastingBS': parseInt($("#fastingBS").val()),
                'MaxHR': parseInt($("#maxHR").val()),
                'ExerciseAngina': $("#exerciseAngina").val()
            };
            $.ajax({
                url: '/predict',
                method: 'POST',
                contentType: 'application/json',
                data: JSON.stringify(data),
                success: function(response) {
                    $("#prediction").text('Prediction: ' + response.prediction);
                },
                error: function(response) {
                    $("#prediction").text('Error: ' + response.responseJSON.error);
                }
            });
        });
        
    </script>
</body>
</html>

In [None]:
from flask import Flask, request, jsonify
import pickle
import numpy as np

# Load the models
cb1 = pickle.load(open('cbheart1.pkl', 'rb'))
cb2 = pickle.load(open('cbheart2.pkl', 'rb'))
lr = pickle.load(open('LRHD_0.8F1_0.75Acc.pkl', 'rb'))
gb = pickle.load(open('GradBoostHeartDisease_0.82_0.82.pkl', 'rb'))

# Define the ensemble weights
weights = np.array([0.25014892, 0.24990093, 0.24985107, 0.25009908])

app = Flask(__name__)
def validate_input(data):
    # Convert input data to the format expected by the models
    data['Sex'] = 1 if data['Sex'].lower() == 'male' else 0
    chest_pain_types = ['typical angina', 'atypical angina', 'non-anginal pain', 'asymptomatic']
    for i, chest_pain_type in enumerate(chest_pain_types):
        data['ChestPainType_' + str(i)] = data['ChestPainType'].lower() == chest_pain_type
    del data['ChestPainType']
    data['FastingBS'] = 1 if data['FastingBS'] > 120 else 0
    data['ExerciseAngina'] = 1 if data['ExerciseAngina'].lower() == 'yes' else 0

    # Convert the data to a flat list of features
    features = [data['Age'], data['Sex'], data['RestingBP'], data['Cholesterol'], data['MaxHR'], data['ExerciseAngina']] + [data['ChestPainType_' + str(i)] for i in range(4)]

    # Define the expected data types for each feature
    expected_types = [int, int, int, int, int, int, bool, bool, bool, bool]
    constraints = [(20,80), (0,1), (0,200), (0,603), (60,202), (0,1), (False,True), (False,True), (False,True), (False,True)]
    
    # Check if the number of features is correct
    if len(features) != len(expected_types):
        return False, "Incorrect number of features. Expected {} but got {}.".format(len(expected_types), len(features)), None
    
    # Check the data type and constraints of each feature
    for i in range(len(features)):
        if type(features[i]) != expected_types[i]:
            return False, "Incorrect data type for feature {}. Expected {} but got {}.".format(i, expected_types[i].__name__, type(features[i]).__name__), None
        if features[i] < constraints[i][0] or features[i] > constraints[i][1]:
            return False, "Feature {} out of bounds. Expected between {} and {} but got {}.".format(i, constraints[i][0], constraints[i][1], features[i]), None
    # If all checks pass, return True and the features
    return True, "Input is valid.", features

        
@app.route('/',methods=["GET"])
def home():
    return app.send_static_file('index.html')


@app.route('/predict', methods=['POST'])
def predict():
    # Get the data from the POST request
    data = request.get_json(force=True)
    is_valid, message, features = validate_input(data)
    if not is_valid:
        print(message)
        return jsonify({'error': message}),400
    # Make prediction using the models
    prediction1 = cb1.predict_proba([np.array(features)])
    prediction2 = cb2.predict_proba([np.array(features)])
    prediction3 = lr.predict_proba([np.array(features)])
    prediction4 = gb.predict_proba([np.array(features)])

    # Compute the ensemble prediction
    ensemble_prediction = np.argmax(np.average(np.array([prediction1, prediction2, prediction3, prediction4]), axis=0, weights=weights))

    # Send back to the client
    output = {'prediction': int(ensemble_prediction)}
    return jsonify(output)

if __name__ == '__main__':
    app.run(port=5000, debug=True)