In [8]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
import tensorflow as tf

import matplotlib.pyplot as plt
from torchsummary import summary
from sklearn.model_selection import train_test_split
import numpy as np

In [9]:
url = "https://github.com/MyungKyuYi/AI-class/raw/refs/heads/main/diabetes.csv"
data = pd.read_csv(url)
data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [10]:
data.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [11]:
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']

In [24]:
X = data.drop('BMI', axis=1).values
y = data['BMI'].values

In [25]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [27]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape,

((614, 8), (154, 8), (614,), (154,))

In [28]:
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [29]:
# Create DataLoader
train_dataset = TensorDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(X_test, y_test)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [30]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([614, 8]),
 torch.Size([154, 8]),
 torch.Size([614, 1]),
 torch.Size([154, 1]))

In [31]:
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(8, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [32]:
# 학습 루프
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_dataloader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_dataloader):.4f}")

Epoch 1, Loss: 1074.0210
Epoch 2, Loss: 1002.5862
Epoch 3, Loss: 914.6161
Epoch 4, Loss: 757.1090
Epoch 5, Loss: 560.5519
Epoch 6, Loss: 356.6425
Epoch 7, Loss: 192.8144
Epoch 8, Loss: 119.9525
Epoch 9, Loss: 100.0218
Epoch 10, Loss: 98.9467
Epoch 11, Loss: 81.6600
Epoch 12, Loss: 76.0666
Epoch 13, Loss: 73.0525
Epoch 14, Loss: 68.4608
Epoch 15, Loss: 67.2212
Epoch 16, Loss: 62.9221
Epoch 17, Loss: 61.1007
Epoch 18, Loss: 61.8160
Epoch 19, Loss: 58.7183
Epoch 20, Loss: 57.4173
Epoch 21, Loss: 61.2540
Epoch 22, Loss: 57.4556
Epoch 23, Loss: 54.5449
Epoch 24, Loss: 52.8741
Epoch 25, Loss: 52.9501
Epoch 26, Loss: 52.2616
Epoch 27, Loss: 49.7483
Epoch 28, Loss: 50.8298
Epoch 29, Loss: 48.2211
Epoch 30, Loss: 47.7520
Epoch 31, Loss: 47.2357
Epoch 32, Loss: 47.4459
Epoch 33, Loss: 47.1184
Epoch 34, Loss: 46.1023
Epoch 35, Loss: 48.8951
Epoch 36, Loss: 47.2090
Epoch 37, Loss: 44.5411
Epoch 38, Loss: 45.9423
Epoch 39, Loss: 43.9066
Epoch 40, Loss: 43.5865
Epoch 41, Loss: 43.7700
Epoch 42, Loss

In [33]:
# 평가
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_dataloader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")


Test MSE: 52.5806
