In [None]:
import sys
import os
import pandas as pd
from sklearn.model_selection import train_test_split

current_dir = os.getcwd()
src_dir = os.path.abspath(os.path.join(current_dir, '../'))  # Cofamy się o dwa poziomy i wchodzimy do /src
sys.path.append(src_dir)

from pipelines.heart_disease_pipeline import build_pipeline

In [None]:
df = pd.read_csv('../data/heart.csv')

# Split data into features and target (HeartDisease)
X = df.drop(columns=['HeartDisease'])  # Features
y = df['HeartDisease']  # Target variable

# Split into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.6, random_state=42)

# Build the pipeline
pipeline = build_pipeline()

# Fit the pipeline on the training set
X_train_processed = pipeline.fit_transform(X_train, y_train)

# Transform the validation and test sets
X_val_processed = pipeline.transform(X_val)
X_test_processed = pipeline.transform(X_test)




In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, roc_auc_score
import numpy as np

# Prepare data as PyTorch tensors
X_train_tensor = torch.tensor(X_train_processed.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)

X_val_tensor = torch.tensor(X_val_processed.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).unsqueeze(1)

X_test_tensor = torch.tensor(X_test_processed.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)

# Define the MLP model in PyTorch
class MLP(nn.Module):
    def __init__(self, input_dim):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.layers(x)

# Initialize the model, optimizer, and loss function
model = MLP(input_dim=X_train_processed.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Train the model
num_epochs = 150
model.train()
for epoch in range(num_epochs):
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(train_loader):.4f}')

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    y_pred_prob = model(X_test_tensor).numpy().flatten()
    y_pred = (y_pred_prob >= 0.5).astype(int)

# Results
test_accuracy = accuracy_score(y_test, y_pred)
test_auc = roc_auc_score(y_test, y_pred_prob)

print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test ROC AUC: {test_auc:.4f}')


Epoch 10/150, Loss: 0.6298
Epoch 20/150, Loss: 0.6241
Epoch 30/150, Loss: 0.5914
Epoch 40/150, Loss: 0.5818
Epoch 50/150, Loss: 0.5573
Epoch 60/150, Loss: 0.5275
Epoch 70/150, Loss: 0.5582
Epoch 80/150, Loss: 0.5626
Epoch 90/150, Loss: 0.5603
Epoch 100/150, Loss: 0.5587
Epoch 110/150, Loss: 0.5129
Epoch 120/150, Loss: 0.5631
Epoch 130/150, Loss: 0.5283
Epoch 140/150, Loss: 0.5538
Epoch 150/150, Loss: 0.5475
Test Accuracy: 0.8326
Test ROC AUC: 0.9070
