1 — Import Library

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim


2 — Load Dataset

In [2]:
df = pd.read_csv("diabetes.csv")
df.head()


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


3 — Pisahkan Fitur (X) & Target (y)

In [3]:
X = df.drop("Outcome", axis=1)
y = df["Outcome"]


4 — Split Train & Test

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


5 — Scaling (SANGAT PENTING UNTUK NN)

In [5]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


6 — Convert ke Tensor PyTorch

In [6]:
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1,1)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1,1)


7 — Membangun Neural Network

In [7]:
class DiabetesNN(nn.Module):
    def __init__(self, input_dim):
        super(DiabetesNN, self).__init__()

        self.fc1 = nn.Linear(input_dim, 16)
        self.relu1 = nn.ReLU()

        self.fc2 = nn.Linear(16, 8)
        self.relu2 = nn.ReLU()

        self.fc3 = nn.Linear(8, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)

        x = self.fc2(x)
        x = self.relu2(x)

        x = self.fc3(x)
        x = self.sigmoid(x)
        return x


8 — Inisialisasi Model, Loss, Optimizer

In [8]:
input_dim = X_train.shape[1]
model = DiabetesNN(input_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


9 — Training Model

In [9]:
epochs = 2000

for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    loss.backward()
    optimizer.step()

    if (epoch+1) % 200 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")


Epoch [200/2000], Loss: 0.4424
Epoch [400/2000], Loss: 0.3989
Epoch [600/2000], Loss: 0.3642
Epoch [800/2000], Loss: 0.3312
Epoch [1000/2000], Loss: 0.3012
Epoch [1200/2000], Loss: 0.2694
Epoch [1400/2000], Loss: 0.2375
Epoch [1600/2000], Loss: 0.2087
Epoch [1800/2000], Loss: 0.1817
Epoch [2000/2000], Loss: 0.1626


10 — Evaluasi Model

In [10]:
with torch.no_grad():
    predictions = model(X_test_tensor)


Konversi probabilitas → kelas (0 atau 1)

In [11]:
predicted_classes = (predictions > 0.5).int()


Hitung akurasi

In [12]:
correct = (predicted_classes == y_test_tensor).sum().item()
accuracy = correct / len(y_test_tensor)

print(f"\nModel Accuracy: {accuracy:.4f}")



Model Accuracy: 0.6948


In [13]:
# Baseline: tebak kelas mayoritas
baseline_accuracy = max(y_test.mean(), 1 - y_test.mean())
print(f"Baseline Accuracy: {baseline_accuracy:.4f}")


Baseline Accuracy: 0.6429


Cek distribusi kelas

In [14]:
print(y.value_counts())

Outcome
0    500
1    268
Name: count, dtype: int64


confusion matrix & classification report.

In [15]:
from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_test_tensor, predicted_classes))
print(classification_report(y_test_tensor, predicted_classes))


[[72 27]
 [20 35]]
              precision    recall  f1-score   support

         0.0       0.78      0.73      0.75        99
         1.0       0.56      0.64      0.60        55

    accuracy                           0.69       154
   macro avg       0.67      0.68      0.68       154
weighted avg       0.70      0.69      0.70       154



Improve Model

tweak sederhana (HIGH IMPACT)

In [16]:
class DiabetesNN(nn.Module):
    def __init__(self, input_dim):
        super(DiabetesNN, self).__init__()

        self.fc1 = nn.Linear(input_dim, 32)  # ⬅️ neuron ditambah
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, 1)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)

        x = self.fc2(x)
        x = self.relu(x)

        x = self.fc3(x)
        x = self.sigmoid(x)
        return x


In [17]:
input_dim = X_train.shape[1]
model = DiabetesNN(input_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)  # ⬅️ LR diperkecil


In [18]:
epochs = 3000  # ⬅️ epoch ditambah

for epoch in range(epochs):
    optimizer.zero_grad()

    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    loss.backward()
    optimizer.step()

    if (epoch + 1) % 300 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")


Epoch [300/3000], Loss: 0.4259
Epoch [600/3000], Loss: 0.3783
Epoch [900/3000], Loss: 0.3272
Epoch [1200/3000], Loss: 0.2585
Epoch [1500/3000], Loss: 0.1876
Epoch [1800/3000], Loss: 0.1272
Epoch [2100/3000], Loss: 0.0798
Epoch [2400/3000], Loss: 0.0473
Epoch [2700/3000], Loss: 0.0295
Epoch [3000/3000], Loss: 0.0191


In [19]:
with torch.no_grad():
    predictions = model(X_test_tensor)
    predicted_classes = (predictions > 0.5).int()


In [20]:
correct = (predicted_classes == y_test_tensor).sum().item()
accuracy = correct / len(y_test_tensor)

print(f"\nModel Accuracy: {accuracy:.4f}")



Model Accuracy: 0.6299


In [21]:
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_tensor, predicted_classes))

print("\nClassification Report:")
print(classification_report(y_test_tensor, predicted_classes))



Confusion Matrix:
[[67 32]
 [25 30]]

Classification Report:
              precision    recall  f1-score   support

         0.0       0.73      0.68      0.70        99
         1.0       0.48      0.55      0.51        55

    accuracy                           0.63       154
   macro avg       0.61      0.61      0.61       154
weighted avg       0.64      0.63      0.63       154



THRESHOLD

In [22]:
from sklearn.metrics import classification_report, confusion_matrix

thresholds = [0.3, 0.4, 0.5]

for t in thresholds:
    preds = (predictions > t).int()

    print(f"\n=== Threshold {t} ===")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test_tensor, preds))
    print("Classification Report:")
    print(classification_report(y_test_tensor, preds))



=== Threshold 0.3 ===
Confusion Matrix:
[[64 35]
 [22 33]]
Classification Report:
              precision    recall  f1-score   support

         0.0       0.74      0.65      0.69        99
         1.0       0.49      0.60      0.54        55

    accuracy                           0.63       154
   macro avg       0.61      0.62      0.61       154
weighted avg       0.65      0.63      0.64       154


=== Threshold 0.4 ===
Confusion Matrix:
[[66 33]
 [25 30]]
Classification Report:
              precision    recall  f1-score   support

         0.0       0.73      0.67      0.69        99
         1.0       0.48      0.55      0.51        55

    accuracy                           0.62       154
   macro avg       0.60      0.61      0.60       154
weighted avg       0.64      0.62      0.63       154


=== Threshold 0.5 ===
Confusion Matrix:
[[67 32]
 [25 30]]
Classification Report:
              precision    recall  f1-score   support

         0.0       0.73      0.68      0.7

In [23]:
torch.save(model.state_dict(), "diabetes_model_0_4_threshold.pth")
print("Model saved to diabetes_model_0_4_threshold.pth")

Model saved to diabetes_model_0_4_threshold.pth
