In [1]:
# %pip install numpy
# %pip install codecarbon
# %pip install pandas
# %pip install matplotlib
# %pip install seaborn
# %pip install scikit-learn
# %pip install tensorflow


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from codecarbon import EmissionsTracker
sns.set()

In [None]:
data = pd.read_csv("Cardiovascular_Disease_Dataset.csv")

In [None]:
data.head(1000)

In [None]:
data.info()

In [45]:
# Iniciar o monitoramento de consumo de energia
tracker = EmissionsTracker()
tracker.start()

# Parar o monitoramento de consumo de energia
# tracker.stop()

[codecarbon ERROR @ 21:14:32] Error: Another instance of codecarbon is already running. Turn off the other instance to be able to run this one. Exiting.


In [None]:
target_counts = data.target.value_counts()
target_counts

In [None]:
plt.pie(target_counts,labels=target_counts.index,autopct='%1.1f%%',colors=['red','lightgreen'])

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(data['age'], bins=30, edgecolor='black', alpha=0.7)
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.title('Age Distribution')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
features = list(data.columns)
features.remove('target')
data1 = data[features + ['target']]

In [None]:
data1

In [None]:
correlation_matrix = data1.corr()
fig,ax=plt.subplots(figsize=(15,15))

ax=sns.heatmap(correlation_matrix, annot=True, cmap="YlGn")

plt.xlabel("Features")
plt.ylabel("Features")
plt.title("Correlation Matrix")
plt.show()

In [None]:
X=data1.drop(['patientid','target'],axis=1)
X.columns

In [None]:
y=data1['target']
patientid=data1['patientid']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test, id_train, id_test = train_test_split(X, y, patientid, test_size=0.2, random_state=42)

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)  # Important: Use transform, not fit_transform, on test data

In [None]:
hyperparameters = {
    'penalty': 'l2',
    'solver': 'lbfgs',  # Solver algorithm
    'max_iter': 1000  # number of iterations
}

In [None]:
model_log = LogisticRegression(**hyperparameters)
result_log=model_log.fit(X_train, y_train)

In [None]:
y_log_pred = model_log.predict(X_test)
y_log_pred

In [None]:
accuracy = accuracy_score(y_test, y_log_pred)
print(f"Accuracy: {accuracy:.2f}")

In [None]:
print("Classification Report:")
print(classification_report(y_test, y_log_pred))

In [None]:
from sklearn import metrics
confusion_matrix=metrics.confusion_matrix(y_test, y_log_pred)
confusion_matrix

In [None]:
%pip install statsmodels
import statsmodels.api as sm
logit_model = sm.Logit(y_train, X_train)
result_smlog = logit_model.fit()

print(result_smlog.summary())

In [None]:
smlog_pred=result_smlog.predict(X_test)
smlog_pred_binary = (smlog_pred >= 0.5).astype(int)

accuracy_logit = accuracy_score(y_test, smlog_pred_binary)
print("Accuracy:", accuracy_logit)

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
auc_score = roc_auc_score(y_test, smlog_pred_binary)
auc_score

In [None]:
fpr, tpr, thresholds = roc_curve(y_test, smlog_pred_binary)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc_score:.2f})')
plt.plot([0, 1], [0, 1], linestyle='--', color='r')
plt.xlabel('False Positive Rate (1-Specificity)')
plt.ylabel('True Positive Rate (Sensitivity)')
plt.title('ROC Curve for Logistic Regression')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
cmlg = confusion_matrix(y_test, smlog_pred_binary)

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(cmlg, annot=True, cmap='Blues', fmt='g', cbar=False)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix for test data')
plt.show()

In [None]:
from sklearn.decomposition import PCA

# Reduce the dimensionality of the feature space using PCA
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train)

# Train a logistic regression model on the reduced feature space
model_log_pca = LogisticRegression(**hyperparameters)
model_log_pca.fit(X_train_pca, y_train)

# Define a meshgrid for the reduced feature space
x_min, x_max = X_train_pca[:, 0].min() - 1, X_train_pca[:, 0].max() + 1
y_min, y_max = X_train_pca[:, 1].min() - 1, X_train_pca[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

# Calculate the predicted probabilities for each point in the meshgrid
Z = model_log_pca.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot the decision boundary along with the training data points in the reduced space
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
plt.scatter(X_train_pca[:, 0], X_train_pca[:, 1], c=y_train, cmap=plt.cm.coolwarm, edgecolors='k')
plt.xlabel('a ')
plt.ylabel('b')
plt.title('Logistic Regression Decision Boundary')
plt.colorbar()
plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
X_train_scaled = scaler.fit_transform(X_train)

X_test_scaled = scaler.transform(X_test)

In [None]:
# %pip install tensorflow
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    keras.Input(shape=(12,)),  # Define input shape using Input layer
    layers.Dense(100, activation='relu'),
    layers.Dropout(0.5), #Aumentando a taxa de regularização para (0.5), visando reduzir o overfitting
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

In [None]:
# Treinamento e avaliação do modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=100, batch_size=200, validation_split=0.2)
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

In [None]:
model.fit(X_train_scaled, y_train, epochs=100, batch_size=200, validation_split=0.2)

In [None]:
y_pred_prob = model.predict(X_test_scaled)
y_pred = (y_pred_prob > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

In [None]:
model = tf.keras.models.Sequential([
    keras.Input(shape=(X_train_scaled.shape[1],)),  # Define input shape using Input layer
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(X_train_scaled, y_train, epochs=100, validation_split=0.2)

In [None]:
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

In [None]:
model = tf.keras.models.Sequential([
       tf.keras.layers.Input(shape=(X_train_scaled.shape[1],)),
       tf.keras.layers.Dense(100, activation='relu'),
       tf.keras.layers.BatchNormalization(),
       tf.keras.layers.Dropout(0.5),
       tf.keras.layers.Dense(100, activation='relu'),
       tf.keras.layers.Dense(1, activation='sigmoid')
   ])

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
deepmodel= model.fit(X_train_scaled, y_train, epochs=100, batch_size=200, validation_split=0.2)

In [None]:
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

# Parar o monitoramento de consumo de energia e imprimir o relatório
tracker.stop()