In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [3]:
# 데이터 로드
data = pd.read_csv("C:\\Users\\LG\\Downloads\\diabetes.csv")

In [6]:
# 특성과 타겟 분리
X = data.drop(columns=['Outcome']).values
y = data['Outcome'].values.astype(np.int64)  # 분류이므로 int64

In [8]:
# 정규화
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [10]:
# train/test 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# Tensor 변환
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [14]:
# 이진 분류 모델 정의
class ClassificationModel(nn.Module):
    def __init__(self):
        super(ClassificationModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(8, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 2)  # 이진 분류: 클래스 2개
        )

    def forward(self, x):
        return self.model(x)

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ClassificationModel().to(device)
criterion = nn.CrossEntropyLoss()  # 분류용 손실 함수
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [18]:
# 학습 루프
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1, Loss: 0.6342
Epoch 2, Loss: 0.5676
Epoch 3, Loss: 0.5134
Epoch 4, Loss: 0.4744
Epoch 5, Loss: 0.4540
Epoch 6, Loss: 0.4542
Epoch 7, Loss: 0.4377
Epoch 8, Loss: 0.4392
Epoch 9, Loss: 0.4289
Epoch 10, Loss: 0.4163
Epoch 11, Loss: 0.4254
Epoch 12, Loss: 0.4391
Epoch 13, Loss: 0.4130
Epoch 14, Loss: 0.4029
Epoch 15, Loss: 0.3971
Epoch 16, Loss: 0.4071
Epoch 17, Loss: 0.4085
Epoch 18, Loss: 0.4021
Epoch 19, Loss: 0.4006
Epoch 20, Loss: 0.3953
Epoch 21, Loss: 0.4070
Epoch 22, Loss: 0.3884
Epoch 23, Loss: 0.3848
Epoch 24, Loss: 0.3800
Epoch 25, Loss: 0.3808
Epoch 26, Loss: 0.3777
Epoch 27, Loss: 0.3764
Epoch 28, Loss: 0.3688
Epoch 29, Loss: 0.3739
Epoch 30, Loss: 0.3747
Epoch 31, Loss: 0.3750
Epoch 32, Loss: 0.3744
Epoch 33, Loss: 0.3667
Epoch 34, Loss: 0.3699
Epoch 35, Loss: 0.3711
Epoch 36, Loss: 0.3669
Epoch 37, Loss: 0.3522
Epoch 38, Loss: 0.3577
Epoch 39, Loss: 0.3580
Epoch 40, Loss: 0.3467
Epoch 41, Loss: 0.3514
Epoch 42, Loss: 0.3400
Epoch 43, Loss: 0.3451
Epoch 44, Loss: 0.34

In [20]:
# 평가
model.eval()
all_preds = []
with torch.no_grad():
    for X_batch, _ in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        all_preds.extend(preds)

In [22]:
accuracy = accuracy_score(y_test, all_preds)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 74.03%
