In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score


In [2]:
# 데이터 불러오기
file_path = r"C:\Users\tjdgu\CodingFiles\AI_Introduction\week6\diabetes.csv"
df = pd.read_csv(file_path)

df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [3]:
# 입력과 타겟 분리
X = df.drop('BMI', axis=1).values
y = df['BMI'].values.astype(np.float32)

In [4]:
X

array([[  6.   , 148.   ,  72.   , ...,   0.627,  50.   ,   1.   ],
       [  1.   ,  85.   ,  66.   , ...,   0.351,  31.   ,   0.   ],
       [  8.   , 183.   ,  64.   , ...,   0.672,  32.   ,   1.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,   0.245,  30.   ,   0.   ],
       [  1.   , 126.   ,  60.   , ...,   0.349,  47.   ,   1.   ],
       [  1.   ,  93.   ,  70.   , ...,   0.315,  23.   ,   0.   ]])

In [5]:
y

array([33.6, 26.6, 23.3, 28.1, 43.1, 25.6, 31. , 35.3, 30.5,  0. , 37.6,
       38. , 27.1, 30.1, 25.8, 30. , 45.8, 29.6, 43.3, 34.6, 39.3, 35.4,
       39.8, 29. , 36.6, 31.1, 39.4, 23.2, 22.2, 34.1, 36. , 31.6, 24.8,
       19.9, 27.6, 24. , 33.2, 32.9, 38.2, 37.1, 34. , 40.2, 22.7, 45.4,
       27.4, 42. , 29.7, 28. , 39.1,  0. , 19.4, 24.2, 24.4, 33.7, 34.7,
       23. , 37.7, 46.8, 40.5, 41.5,  0. , 32.9, 25. , 25.4, 32.8, 29. ,
       32.5, 42.7, 19.6, 28.9, 32.9, 28.6, 43.4, 35.1, 32. , 24.7, 32.6,
       37.7, 43.2, 25. , 22.4,  0. , 29.3, 24.6, 48.8, 32.4, 36.6, 38.5,
       37.1, 26.5, 19.1, 32. , 46.7, 23.8, 24.7, 33.9, 31.6, 20.4, 28.7,
       49.7, 39. , 26.1, 22.5, 26.6, 39.6, 28.7, 22.4, 29.5, 34.3, 37.4,
       33.3, 34. , 31.2, 34. , 30.5, 31.2, 34. , 33.7, 28.2, 23.2, 53.2,
       34.2, 33.6, 26.8, 33.3, 55. , 42.9, 33.3, 34.5, 27.9, 29.7, 33.3,
       34.5, 38.3, 21.1, 33.8, 30.8, 28.7, 31.2, 36.9, 21.1, 39.5, 32.5,
       32.4, 32.8,  0. , 32.8, 30.5, 33.7, 27.3, 37

In [6]:
# 정규화
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [7]:
# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
# TensorDataset으로 래핑

# 넘파이를 텐서로 바꾸기
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# 입력값, 정답값 하나로 묶기
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# 데이터를 배치사이즈 만큼 꺼내줌
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)


In [9]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((614, 8), (154, 8), (614,), (154,))

In [10]:
# 회귀 모델 정의
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(8, 64),   # 입력특성: 8개, 노드수: 64개
            nn.ReLU(),
            nn.Linear(64, 32),  # 노드수: 64 -> 32
            nn.ReLU(),
            nn.Linear(32, 1)    # 노드: 32 -> 1(출력층)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [11]:
# 학습 루프
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")


Epoch 1, Loss: 1074.9000
Epoch 2, Loss: 1008.0527
Epoch 3, Loss: 905.4188
Epoch 4, Loss: 754.9640
Epoch 5, Loss: 541.5829
Epoch 6, Loss: 324.0040
Epoch 7, Loss: 174.5965
Epoch 8, Loss: 93.0840
Epoch 9, Loss: 75.6163
Epoch 10, Loss: 69.4904
Epoch 11, Loss: 69.0478
Epoch 12, Loss: 62.9466
Epoch 13, Loss: 60.2416
Epoch 14, Loss: 58.1738
Epoch 15, Loss: 56.4974
Epoch 16, Loss: 55.0321
Epoch 17, Loss: 53.3287
Epoch 18, Loss: 55.2694
Epoch 19, Loss: 58.0933
Epoch 20, Loss: 51.0711
Epoch 21, Loss: 49.4891
Epoch 22, Loss: 49.9382
Epoch 23, Loss: 49.9376
Epoch 24, Loss: 47.1834
Epoch 25, Loss: 46.0300
Epoch 26, Loss: 45.4701
Epoch 27, Loss: 44.7172
Epoch 28, Loss: 46.3902
Epoch 29, Loss: 44.1487
Epoch 30, Loss: 42.3913
Epoch 31, Loss: 44.0666
Epoch 32, Loss: 41.7367
Epoch 33, Loss: 41.7247
Epoch 34, Loss: 41.3637
Epoch 35, Loss: 40.1263
Epoch 36, Loss: 40.9625
Epoch 37, Loss: 40.5440
Epoch 38, Loss: 38.5284
Epoch 39, Loss: 37.7717
Epoch 40, Loss: 37.8041
Epoch 41, Loss: 38.4312
Epoch 42, Loss: 

In [12]:
# 평가
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")


Test MSE: 60.5095
