In [1]:
!sudo apt-get install pciutils lshw
!sudo apt-get install -y nvidia-driver-535

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libpci3 pci.ids usb.ids
The following NEW packages will be installed:
  libpci3 lshw pci.ids pciutils usb.ids
0 upgraded, 5 newly installed, 0 to remove and 49 not upgraded.
Need to get 883 kB of archives.
After this operation, 3,256 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 pci.ids all 0.0~2022.01.22-1 [251 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libpci3 amd64 1:3.7.0-6 [28.9 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 lshw amd64 02.19.git.2021.06.19.996aaad9c7-2build1 [321 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy/main amd64 pciutils amd64 1:3.7.0-6 [63.6 kB]
Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 usb.ids all 2022.04.02-1 [219 kB]
Fetched 883 kB in 2s (420 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No

KeyboardInterrupt: 

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout

def convert_age_to_numeric(age_range):
    if '80 or older' in age_range:
        return 80
    else:
        start, end = map(int, age_range.split('-'))
        return (start + end) / 2

In [6]:
# Load dataset
data = pd.read_csv('Heart_Disease.csv')
data.drop_duplicates(inplace=True)

for col in data.columns:
    unico = data[col].unique()
    if set(unico) == {'Yes', 'No'}:
        data[col] = data[col].map({'Yes': 1, 'No': 0})
data['Sex'] = data['Sex'].map({'Female':0, 'Male':1})
data['AgeCategory'] = data['AgeCategory'].apply(convert_age_to_numeric).astype(int)
data['GenHealth'] = data['GenHealth'].map({'Poor':0, 'Fair':1, 'Good':2, 'Very good':3, 'Excellent':4})
data['Diabetic'] = data['Diabetic'].map({'Yes':2, 'No':-1, 'No, borderline diabetes':0, 'Yes (during pregnancy)':1})

# Convert categorical data to numerical data
labelencoder = LabelEncoder()
data['Race'] = labelencoder.fit_transform(data['Race'])


In [7]:
# Separate features and target variable
X = data.drop('HeartDisease', axis=1)
y = data['HeartDisease']

In [8]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# XGBoost
from xgboost import XGBClassifier
model = XGBClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('XGBoost')
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1 Score:', f1_score(y_test, y_pred))

XGBoost
Accuracy: 0.9096679040169694
Precision: 0.49952874646559847
Recall: 0.09724770642201835
F1 Score: 0.1628014129933958


In [10]:
# Apply SMOTE to the training data to handle class imbalance
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [11]:
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test)

Model Training and Evaluation

In [13]:
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
    except RuntimeError as e:
        print(e)

In [14]:
# Create the model
model = Sequential()

# input layer
model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))

# hidden layers
model.add(Dense(32, activation='relu'))

# hidden layers
model.add(Dropout(0.5))

# output layer
model.add(Dense(1, activation='sigmoid'))  # Sigmoide for binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_scaled, y_train_resampled, epochs=20, batch_size=128, validation_data=(X_test_scaled, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Accuracy: {accuracy*100:.2f}%")

# Plot the training history
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# Accuracy
print("Classification Report:")
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m3431/3431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 5ms/step - accuracy: 0.7697 - loss: 0.4872 - val_accuracy: 0.7310 - val_loss: 0.5041
Epoch 2/20
[1m3431/3431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2ms/step - accuracy: 0.7923 - loss: 0.4554 - val_accuracy: 0.7473 - val_loss: 0.4814
Epoch 3/20
[1m3431/3431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.7937 - loss: 0.4506 - val_accuracy: 0.7519 - val_loss: 0.4647
Epoch 4/20
[1m3431/3431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.7953 - loss: 0.4477 - val_accuracy: 0.7349 - val_loss: 0.4908
Epoch 5/20
[1m3431/3431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.7956 - loss: 0.4467 - val_accuracy: 0.7490 - val_loss: 0.4708
Epoch 6/20
[1m3431/3431[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - accuracy: 0.7958 - loss: 0.4462 - val_accuracy: 0.7503 - val_loss: 0.4597
Epoch 7/20
[

In [15]:
from sklearn.metrics import confusion_matrix

# Calcular la matriz de confusión
cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz
TN, FP, FN, TP = cm.ravel()

# Imprimir la matriz de confusión con los nombres
print("Confusion Matrix:")
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")
print(f"True Positives (TP): {TP}")

# También puedes imprimir la matriz completa para referencia
print("\nMatriz de Confusión Completa:")
print(cm)

Confusion Matrix:
True Negatives (TN): 40928
False Positives (FP): 13966
False Negatives (FN): 1934
True Positives (TP): 3516

Matriz de Confusión Completa:
[[40928 13966]
 [ 1934  3516]]


In [16]:
# Precision
precision = precision_score(y_test, y_pred)
print(f"Precision: {precision:.2f}")

# Recall
recall = recall_score(y_test, y_pred)
print(f"Recall: {recall:.2f}")

# F1-Score
f1 = f1_score(y_test, y_pred)
print(f"F1-Score: {f1:.2f}")

# AUC-ROC
auc_roc = roc_auc_score(y_test, y_pred)
print(f"AUC-ROC: {auc_roc:.2f}")

# Accuracy (again for explicitness)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Precision: 0.20
Recall: 0.65
F1-Score: 0.31
AUC-ROC: 0.70
Accuracy: 0.74
