In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score


In [2]:
# 데이터 불러오기
file_path = r"C:\Users\tjdgu\CodingFiles\AI_Introduction\week6\diabetes.csv"
df = pd.read_csv(file_path)

df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [3]:
# 입력과 타겟 분리
X = df.drop('Outcome', axis=1).values
y = df['Outcome'].values.astype(np.float32)

In [4]:
X

array([[  6.   , 148.   ,  72.   , ...,  33.6  ,   0.627,  50.   ],
       [  1.   ,  85.   ,  66.   , ...,  26.6  ,   0.351,  31.   ],
       [  8.   , 183.   ,  64.   , ...,  23.3  ,   0.672,  32.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,  26.2  ,   0.245,  30.   ],
       [  1.   , 126.   ,  60.   , ...,  30.1  ,   0.349,  47.   ],
       [  1.   ,  93.   ,  70.   , ...,  30.4  ,   0.315,  23.   ]])

In [5]:
y

array([1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1.,
       1., 0., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0.,
       0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0.,
       0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 1., 0., 0.,
       0., 1., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
       1., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1.,
       1., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0.,
       0., 0., 1., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1.,
       1., 0., 0., 0., 0.

In [6]:
# 정규화
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [7]:
# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
# TensorDataset으로 래핑

# 넘파이를 텐서로 바꾸기
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# 입력값, 정답값 하나로 묶기
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# 데이터를 배치사이즈 만큼 꺼내줌
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)


In [9]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((614, 8), (154, 8), (614,), (154,))

In [10]:
# 분류 모델 정의
class ClassificationModel(nn.Module):
    def __init__(self):
        super(ClassificationModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(8, 64),   # 입력특성: 8개, 노드수: 64개
            nn.ReLU(),
            nn.Linear(64, 32),  # 노드수: 64 -> 32
            nn.ReLU(),
            nn.Linear(32, 1)    # 노드: 32 -> 1(출력층)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ClassificationModel().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [11]:
# 학습 루프
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")


Epoch 1, Loss: 0.6662
Epoch 2, Loss: 0.6042
Epoch 3, Loss: 0.5406
Epoch 4, Loss: 0.4961
Epoch 5, Loss: 0.4669
Epoch 6, Loss: 0.4549
Epoch 7, Loss: 0.4666
Epoch 8, Loss: 0.4500
Epoch 9, Loss: 0.4434
Epoch 10, Loss: 0.4490
Epoch 11, Loss: 0.4376
Epoch 12, Loss: 0.4469
Epoch 13, Loss: 0.4444
Epoch 14, Loss: 0.4512
Epoch 15, Loss: 0.4323
Epoch 16, Loss: 0.4429
Epoch 17, Loss: 0.4189
Epoch 18, Loss: 0.4217
Epoch 19, Loss: 0.4153
Epoch 20, Loss: 0.4132
Epoch 21, Loss: 0.4080
Epoch 22, Loss: 0.4130
Epoch 23, Loss: 0.4090
Epoch 24, Loss: 0.4311
Epoch 25, Loss: 0.4056
Epoch 26, Loss: 0.4016
Epoch 27, Loss: 0.3990
Epoch 28, Loss: 0.3984
Epoch 29, Loss: 0.4174
Epoch 30, Loss: 0.4012
Epoch 31, Loss: 0.4015
Epoch 32, Loss: 0.3990
Epoch 33, Loss: 0.3912
Epoch 34, Loss: 0.3989
Epoch 35, Loss: 0.3896
Epoch 36, Loss: 0.3871
Epoch 37, Loss: 0.3996
Epoch 38, Loss: 0.3852
Epoch 39, Loss: 0.3986
Epoch 40, Loss: 0.3875
Epoch 41, Loss: 0.3844
Epoch 42, Loss: 0.3985
Epoch 43, Loss: 0.3911
Epoch 44, Loss: 0.37

In [12]:
# 평가

model.eval()
preds, actuals = [], []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch)
        probs = torch.sigmoid(outputs).cpu().numpy()  # 0~1 확률로 변환
        preds.extend(probs)
        actuals.extend(y_batch.cpu().numpy())

# threshold: 0.5 이상이면 1, 아니면 0
pred_labels = (np.array(preds) >= 0.5).astype(int)

print("Accuracy:", accuracy_score(actuals, pred_labels))
print("F1 Score:", f1_score(actuals, pred_labels))
print("AUC Score:", roc_auc_score(actuals, preds))


Accuracy: 0.7662337662337663
F1 Score: 0.6727272727272727
AUC Score: 0.8080808080808082
