In [78]:
import pandas as pd

df = pd.read_csv("BP_data.csv")
df

Unnamed: 0,Patient_Number,Blood_Pressure_Abnormality,Level_of_Hemoglobin,Genetic_Pedigree_Coefficient,Age,BMI,Sex,Pregnancy,Smoking,Physical_activity,salt_content_in_the_diet,alcohol_consumption_per_day,Level_of_Stress,Chronic_kidney_disease,Adrenal_and_thyroid_disorders
0,1,1,11.28,0.90,34,23,1,1.0,0,45961,48071,,2,1,1
1,2,0,9.75,0.23,54,33,1,,0,26106,25333,205.0,3,0,0
2,3,1,10.79,0.91,70,49,0,,0,9995,29465,67.0,2,1,0
3,4,0,11.00,0.43,71,50,0,,0,10635,7439,242.0,1,1,0
4,5,1,14.17,0.83,52,19,0,,0,15619,49644,397.0,2,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1996,1,10.14,0.02,69,26,1,,1,26118,47568,144.0,3,1,0
1996,1997,1,11.77,1.00,24,45,1,1.0,1,2572,8063,,3,1,1
1997,1998,1,16.91,0.22,18,42,0,,0,14933,24753,,2,1,1
1998,1999,0,11.15,0.72,46,45,1,,1,18157,15275,253.0,3,0,1


In [79]:
df = df.drop(columns=['Pregnancy'])
df = df.dropna()
print(df.isna().sum())

Patient_Number                   0
Blood_Pressure_Abnormality       0
Level_of_Hemoglobin              0
Genetic_Pedigree_Coefficient     0
Age                              0
BMI                              0
Sex                              0
Smoking                          0
Physical_activity                0
salt_content_in_the_diet         0
alcohol_consumption_per_day      0
Level_of_Stress                  0
Chronic_kidney_disease           0
Adrenal_and_thyroid_disorders    0
dtype: int64


In [80]:
X = df.drop(columns=['Blood_Pressure_Abnormality']).values
Y = df['Blood_Pressure_Abnormality'].values

print(Y)

[0 1 0 ... 1 0 1]


In [81]:
from sklearn.preprocessing import StandardScaler

# 정규화
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [82]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.2, random_state=42)
print(X_train.shape)

(1341, 13)


In [83]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# TensorDataset으로 래핑
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1,1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1,1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [86]:
import torch.nn as nn
import torch.optim as optim

# 분류 모델 정의
class ClassificationModel(nn.Module):
    def __init__(self):
        super(ClassificationModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(13, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1) # 출력 1개, BCEWithLogitsLoss에 Sigmoid 내장
        )
    def forward(self, x):
        return self.model(x)

# GPU 사용, 없으면 cpu 사용
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ClassificationModel().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [87]:
# 학습 루프
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1, Loss: 0.6455
Epoch 2, Loss: 0.5306
Epoch 3, Loss: 0.4686
Epoch 4, Loss: 0.4145
Epoch 5, Loss: 0.3525
Epoch 6, Loss: 0.3109
Epoch 7, Loss: 0.2836
Epoch 8, Loss: 0.2711
Epoch 9, Loss: 0.2449
Epoch 10, Loss: 0.2304
Epoch 11, Loss: 0.2153
Epoch 12, Loss: 0.2081
Epoch 13, Loss: 0.1932
Epoch 14, Loss: 0.1796
Epoch 15, Loss: 0.1701
Epoch 16, Loss: 0.1609
Epoch 17, Loss: 0.1500
Epoch 18, Loss: 0.1389
Epoch 19, Loss: 0.1271
Epoch 20, Loss: 0.1186
Epoch 21, Loss: 0.1073
Epoch 22, Loss: 0.1062
Epoch 23, Loss: 0.0934
Epoch 24, Loss: 0.0860
Epoch 25, Loss: 0.0811
Epoch 26, Loss: 0.0747
Epoch 27, Loss: 0.0671
Epoch 28, Loss: 0.0628
Epoch 29, Loss: 0.0541
Epoch 30, Loss: 0.0476
Epoch 31, Loss: 0.0420
Epoch 32, Loss: 0.0388
Epoch 33, Loss: 0.0402
Epoch 34, Loss: 0.0327
Epoch 35, Loss: 0.0315
Epoch 36, Loss: 0.0262
Epoch 37, Loss: 0.0230
Epoch 38, Loss: 0.0228
Epoch 39, Loss: 0.0179
Epoch 40, Loss: 0.0173
Epoch 41, Loss: 0.0152
Epoch 42, Loss: 0.0133
Epoch 43, Loss: 0.0114
Epoch 44, Loss: 0.01

In [88]:
from sklearn.metrics import  accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score
import torch

# 평가
model.eval()

all_preds = []
all_probs = []
all_labels = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        # 예측
        outputs = model(X_batch)
        probs = torch.sigmoid(outputs)  # 확률로 변환
        preds = (probs > 0.5).float()   # 0.5 기준으로 이진 분류

        all_preds.extend(preds.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())  # ROC AUC용
        all_labels.extend(y_batch.cpu().numpy())


# 평가 지표 출력
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds)
recall = recall_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds)
conf_matrix = confusion_matrix(all_labels, all_preds)
roc_auc = roc_auc_score(all_labels, all_probs)

print(f"Accuracy     : {accuracy:.4f}")
print(f"Precision    : {precision:.4f}")
print(f"Recall       : {recall:.4f}")
print(f"F1 Score     : {f1:.4f}")
print(f"ROC AUC Score: {roc_auc:.4f}")
print(f"Confusion Matrix:\n{conf_matrix}")

Accuracy     : 0.8185
Precision    : 0.8187
Recall       : 0.8037
F1 Score     : 0.8111
ROC AUC Score: 0.8885
Confusion Matrix:
[[144  29]
 [ 32 131]]
