In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

# 데이터 로드
data = pd.read_csv("heart.csv")

# 데이터 확인
print("데이터 형태:", data.shape)
print("\n컬럼 정보:\n", data.columns)
print("\n클래스 분포:\n", data["target"].value_counts())

# 특성(X)과 레이블(y) 분리
X = data.drop("target", axis=1)
y = data["target"]

# 데이터 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 훈련/테스트 세트 분할
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


데이터 형태: (303, 14)

컬럼 정보:
 Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

클래스 분포:
 target
1    165
0    138
Name: count, dtype: int64


In [4]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

models = {
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "KNN": KNeighborsClassifier()
}

# 모델별 성능 평가
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    
    print(f"\n=== {name} ===")
    print(f"정확도: {accuracy:.4f}")
    print("Confusion Matrix:")
    print(cm)



=== SVM ===
정확도: 0.8689
Confusion Matrix:
[[26  3]
 [ 5 27]]

=== Decision Tree ===
정확도: 0.8033
Confusion Matrix:
[[26  3]
 [ 9 23]]

=== Random Forest ===
정확도: 0.8525
Confusion Matrix:
[[24  5]
 [ 4 28]]

=== Logistic Regression ===
정확도: 0.8525
Confusion Matrix:
[[25  4]
 [ 5 27]]

=== KNN ===
정확도: 0.9180
Confusion Matrix:
[[27  2]
 [ 3 29]]


In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# 데이터 텐서 변환
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# 검증 세트 분리 (20% of training data)
X_train_sub, X_val, y_train_sub, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42
)

# DataLoader 생성
train_dataset = TensorDataset(
    torch.tensor(X_train_sub, dtype=torch.float32),
    torch.tensor(y_train_sub.values, dtype=torch.float32).view(-1, 1)
)
val_dataset = TensorDataset(
    torch.tensor(X_val, dtype=torch.float32),
    torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

# 신경망 정의
class HeartDiseaseClassifier(nn.Module):
    def __init__(self, input_dim):
        super(HeartDiseaseClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)  # 입력 차원 동적 설정
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)         # 출력 뉴런 1개
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))         # ReLU 활성화
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))      # 시그모이드 추가
        return x
# 모델 설정
model = HeartDiseaseClassifier(input_dim=X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습 루프
num_epochs = 50
for epoch in range(num_epochs):
    # Training
    model.train()
    train_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            predicted = (outputs > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    
    # 에포크당 결과 출력 (선택 사항)
    print(f"Epoch {epoch+1}/{num_epochs} | "
          f"Train Loss: {train_loss/len(train_loader):.4f} | "
          f"Val Loss: {val_loss/len(val_loader):.4f} | "
          f"Val Acc: {correct/total:.4f}")

# 테스트 평가
model.eval()
with torch.no_grad():
    y_pred_proba = model(X_test_tensor)
    y_pred_dl = (y_pred_proba > 0.5).float().numpy().flatten()

accuracy = accuracy_score(y_test, y_pred_dl)
cm_dl = confusion_matrix(y_test, y_pred_dl)

print("\n=== 딥러닝 모델 (PyTorch) ===")
print(f"정확도: {accuracy:.4f}")
print("Confusion Matrix:")
print(cm_dl)

# 머신러닝 vs 딥러닝 결과 비교
print("\n=== 최종 성능 비교 ===")
print("SVM:", accuracy_score(y_test, models["SVM"].predict(X_test)))
print("PyTorch DL:", accuracy)


Epoch 1/50 | Train Loss: 0.6958 | Val Loss: 0.6659 | Val Acc: 0.8163
Epoch 2/50 | Train Loss: 0.6457 | Val Loss: 0.6470 | Val Acc: 0.7959
Epoch 3/50 | Train Loss: 0.6113 | Val Loss: 0.6290 | Val Acc: 0.7347
Epoch 4/50 | Train Loss: 0.5897 | Val Loss: 0.6096 | Val Acc: 0.7347
Epoch 5/50 | Train Loss: 0.5659 | Val Loss: 0.5868 | Val Acc: 0.7347
Epoch 6/50 | Train Loss: 0.5013 | Val Loss: 0.5634 | Val Acc: 0.7755
Epoch 7/50 | Train Loss: 0.5117 | Val Loss: 0.5402 | Val Acc: 0.8163
Epoch 8/50 | Train Loss: 0.5131 | Val Loss: 0.5197 | Val Acc: 0.8163
Epoch 9/50 | Train Loss: 0.4100 | Val Loss: 0.5004 | Val Acc: 0.7959
Epoch 10/50 | Train Loss: 0.4031 | Val Loss: 0.4842 | Val Acc: 0.7959
Epoch 11/50 | Train Loss: 0.4127 | Val Loss: 0.4715 | Val Acc: 0.8163
Epoch 12/50 | Train Loss: 0.3540 | Val Loss: 0.4593 | Val Acc: 0.7755
Epoch 13/50 | Train Loss: 0.3348 | Val Loss: 0.4506 | Val Acc: 0.7755
Epoch 14/50 | Train Loss: 0.3218 | Val Loss: 0.4434 | Val Acc: 0.7959
Epoch 15/50 | Train Loss: 0.3