In [1]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import RandomUnderSampler

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.ensemble import HistGradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

from imblearn.ensemble import RUSBoostClassifier, EasyEnsembleClassifier, BalancedBaggingClassifier
from imblearn.ensemble import BalancedRandomForestClassifier

In [3]:
clfs1 = {
    'dt' : DecisionTreeClassifier(class_weight = 'balanced'),
    'log': LogisticRegression(solver = 'liblinear', class_weight= 'balanced'),
    'et' : ExtraTreesClassifier(class_weight = 'balanced', n_jobs = -1),
    'rf' : RandomForestClassifier(class_weight = 'balanced', n_jobs = -1),
    'gb' : GradientBoostingClassifier(),
    'hgb': HistGradientBoostingClassifier(class_weight = 'balanced'),
    'ab' : AdaBoostClassifier(),
    'svc': SVC(class_weight = 'balanced'),
    'knn': KNeighborsClassifier(n_jobs = -1)
}

In [4]:
clfs2 = {
    "gnb": GaussianNB(),
    "lg" : LGBMClassifier(class_weight = 'balanced'),
    "xg" : XGBClassifier(n_jobs = -1),
    "cat": CatBoostClassifier(),
    'rus': RUSBoostClassifier(),
    'eec': EasyEnsembleClassifier(n_jobs = -1),
    'bbc': BalancedBaggingClassifier(n_jobs = -1),
    'brf': BalancedRandomForestClassifier(n_jobs = -1)
}

In [5]:
train = pd.read_csv("data//maintrain.csv")
test = pd.read_csv("data//maintest.csv")

In [6]:
X_test = test.drop(['HeartDiseaseorAttack'], axis = 1)
y_test = test['HeartDiseaseorAttack']

In [7]:
X = train.drop(["HeartDiseaseorAttack"], axis = 1)
y = train['HeartDiseaseorAttack']

In [8]:
undersampler = RandomUnderSampler(sampling_strategy = 0.3)
X_under, y_under = undersampler.fit_resample(X, y)

In [9]:
X_train, X_valid, y_train, y_valid = train_test_split(X_under, y_under, test_size = 0.3, random_state = 42,
                                                      stratify = y_under)

In [10]:
scores = pd.DataFrame()

In [None]:
%%time
for name, clf in clfs1.items():
    clf.fit(X_valid, y_valid)
    pred = clf.predict(X_test)
    f1 = f1_score(y_test, pred)
    recall = recall_score(y_test, pred)
    precision = precision_score(y_test, pred)
    temp = pd.Series(data = [f1, recall, precision], index = ['f1', 'recall', 'precision'], name = name)
    scores = pd.concat([scores, temp], axis = 1)

In [None]:
%%time
for name, clf in clfs2.items():
    clf.fit(X_valid, y_valid)
    pred = clf.predict(X_test)
    f1 = f1_score(y_test, pred)
    recall = recall_score(y_test, pred)
    precision = precision_score(y_test, pred)
    temp = pd.Series(data = [f1, recall, precision], index = ['f1', 'recall', 'precision'], name = name)
    scores = pd.concat([scores, temp], axis = 1)

In [62]:
scores.T

Unnamed: 0,f1,recall,precision
dt,0.320098,0.512364,0.232755
log,0.404479,0.775705,0.273562
et,0.373857,0.532321,0.288096
rf,0.391606,0.526247,0.311825
gb,0.424884,0.55705,0.343407
hgb,0.404447,0.789154,0.271898
ab,0.418126,0.528416,0.345924
svc,0.388814,0.817354,0.255077
knn,0.337239,0.493275,0.256196
gnb,0.383608,0.686334,0.266196


In [12]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.utils.class_weight import compute_class_weight

# Assuming X_train, y_train are your training data and labels

# Calculate class weights
class_weights = compute_class_weight('balanced', classes=[0, 1], y=y_train)
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}

# Define a simple neural network architecture
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model with class weights
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model with class weights
model.fit(X_train, y_train, epochs=50, class_weight=class_weight_dict, validation_data=(X_valid, y_valid))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2d096f696a0>

In [13]:
pred = model.predict(X_test)



In [14]:
# Let's say you choose a threshold of 0.5 (you can adjust this based on your analysis)
threshold = 0.5

# Convert raw scores to binary predictions based on the chosen threshold
binary_predictions = (pred >= threshold).astype(int)

In [15]:
f1 = f1_score(y_test, binary_predictions)
recall = recall_score(y_test, binary_predictions)
precision = precision_score(y_test, binary_predictions)
temp = pd.Series(data = [f1, recall, precision], index = ['f1', 'recall', 'precision'], name = "hello")

In [16]:
temp

f1           0.408060
recall       0.773102
precision    0.277182
Name: hello, dtype: float64