In [50]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np

In [51]:
df = pd.read_csv('https://github.com/MyungKyuYi/AI-class/raw/refs/heads/main/diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [52]:
df.isnull().sum()

Unnamed: 0,0
Pregnancies,0
Glucose,0
BloodPressure,0
SkinThickness,0
Insulin,0
BMI,0
DiabetesPedigreeFunction,0
Age,0
Outcome,0


In [53]:
df = df.drop('Outcome', axis = 1)
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [54]:
correlations = df.corr()
print(correlations['BMI'].sort_values(ascending = False))

BMI                         1.000000
SkinThickness               0.392573
BloodPressure               0.281805
Glucose                     0.221071
Insulin                     0.197859
DiabetesPedigreeFunction    0.140647
Age                         0.036242
Pregnancies                 0.017683
Name: BMI, dtype: float64


In [55]:
X = df.drop('BMI', axis = 1).values
y = df['BMI'].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((614, 7), (154, 7), (614,), (154,))

In [57]:
X_train_tensor = torch.tensor(X_train, dtype = torch.float32)
y_train_tensor = torch.tensor(y_train, dtype = torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype = torch.float32)
y_test_tensor = torch.tensor(y_test, dtype = torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = 32)

In [58]:
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(7, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [59]:
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1, Loss: 1091.7319
Epoch 2, Loss: 1026.3484
Epoch 3, Loss: 933.8115
Epoch 4, Loss: 801.0785
Epoch 5, Loss: 624.4665
Epoch 6, Loss: 395.3228
Epoch 7, Loss: 217.4784
Epoch 8, Loss: 124.4260
Epoch 9, Loss: 97.2188
Epoch 10, Loss: 91.2981
Epoch 11, Loss: 86.1725
Epoch 12, Loss: 79.8227
Epoch 13, Loss: 77.6097
Epoch 14, Loss: 76.2834
Epoch 15, Loss: 72.6563
Epoch 16, Loss: 71.1316
Epoch 17, Loss: 69.8325
Epoch 18, Loss: 66.9745
Epoch 19, Loss: 65.4642
Epoch 20, Loss: 71.1374
Epoch 21, Loss: 62.9377
Epoch 22, Loss: 63.3457
Epoch 23, Loss: 65.9186
Epoch 24, Loss: 59.8213
Epoch 25, Loss: 58.9264
Epoch 26, Loss: 57.8463
Epoch 27, Loss: 56.3574
Epoch 28, Loss: 61.7901
Epoch 29, Loss: 54.4106
Epoch 30, Loss: 54.6141
Epoch 31, Loss: 52.1066
Epoch 32, Loss: 51.5021
Epoch 33, Loss: 52.0462
Epoch 34, Loss: 51.9683
Epoch 35, Loss: 49.8710
Epoch 36, Loss: 53.6968
Epoch 37, Loss: 48.7632
Epoch 38, Loss: 54.4549
Epoch 39, Loss: 49.6223
Epoch 40, Loss: 48.6035
Epoch 41, Loss: 46.7361
Epoch 42, Loss:

In [60]:
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")

Test MSE: 53.9844


In [61]:
mae = mean_absolute_error(actuals, preds)
rmse = np.sqrt(mean_squared_error(actuals, preds))
r2 = r2_score(actuals, preds)

print(f"MAE : {mae:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R²  : {r2:.4f}")

MAE : 5.2325
RMSE: 7.3474
R²  : 0.0905
