<a href="https://colab.research.google.com/github/bintangnabiil/Machine_Learning/blob/main/Machine_Learning_UAS_KlasifikasiUTS_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##1a. Pengumpulan & Pembersihan Data

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split, TensorDataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv('/content/KlasifikasiUTS.csv')

# Target regresi (kolom ke-0)
y_reg = df.iloc[:, 0].values.reshape(-1, 1)

# Target klasifikasi (kolom terakhir)
y_clf = df.iloc[:, -1]

# Fitur
X = df.iloc[:, 1:-1].values

##1b. Feature Engineering

In [2]:
# Normalisasi fitur
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Label Encoding untuk klasifikasi
le = LabelEncoder()
y_clf_encoded = le.fit_transform(y_clf)

In [3]:
# Untuk regresi
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_scaled, y_reg, test_size=0.2, random_state=42)

# Untuk klasifikasi
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X_scaled, y_clf_encoded, test_size=0.2, random_state=42)

##1c. Arsitektur MLP

##A. MLP Regressor (PyTorch)

In [4]:
class MLPRegressor(nn.Module):
    def __init__(self, input_dim):
        super(MLPRegressor, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x)

In [5]:
# Convert data ke tensor
X_train_tensor = torch.tensor(X_train_reg, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_reg, dtype=torch.float32)

X_test_tensor = torch.tensor(X_test_reg, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_reg, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

###Training Loop

In [6]:
model = MLPRegressor(input_dim=X_train_tensor.shape[1])
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(100):
    model.train()
    for x_batch, y_batch in train_loader:
        output = model(x_batch)
        loss = criterion(output, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

###Evaluasi Regressor

In [7]:
model.eval()
with torch.no_grad():
    pred = model(X_test_tensor).numpy()

mse = mean_squared_error(y_test_reg, pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_reg, pred)

print(f"MLP Regressor - MSE: {mse:.4f}, RMSE: {rmse:.4f}, R²: {r2:.4f}")

MLP Regressor - MSE: 980764772.3554, RMSE: 31317.1642, R²: 0.5651


##B. MLP Classifier (PyTorch)

In [8]:
class MLPClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MLPClassifier, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, num_classes)
        )

    def forward(self, x):
        return self.net(x)

In [9]:
# Data tensor
X_train_tensor = torch.tensor(X_train_clf, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_clf, dtype=torch.long)

X_test_tensor = torch.tensor(X_test_clf, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_clf, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

###Training Loop

In [10]:
num_classes = len(np.unique(y_train_clf))
model = MLPClassifier(input_dim=X_train_tensor.shape[1], num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(100):
    model.train()
    for x_batch, y_batch in train_loader:
        output = model(x_batch)
        loss = criterion(output, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

###Evaluasi Classifier

In [19]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import torch.nn.functional as F

# Prediksi logits (output langsung dari model sebelum softmax)
logits = model(X_test_tensor)

# Probabilitas prediksi (softmax ke axis kelas)
probs = F.softmax(logits, dim=1).detach().cpu().numpy()

# Kelas prediksi
preds = probs.argmax(axis=1)

# Ground truth (pastikan sudah bentuk 1D array biasa, bukan tensor)
y_true = y_test_clf if isinstance(y_test_clf, np.ndarray) else y_test_clf.numpy()

# Evaluasi
acc = accuracy_score(y_true, preds)
prec = precision_score(y_true, preds, average='weighted', zero_division=0)
rec = recall_score(y_true, preds, average='weighted', zero_division=0)
f1 = f1_score(y_true, preds, average='weighted', zero_division=0)

# AUC hanya bisa dihitung jika jumlah kelas >= 2 dan probabilitas valid
try:
    if probs.shape[1] == 2:
        roc = roc_auc_score(y_true, probs[:, 1])
    else:
        roc = roc_auc_score(y_true, probs, multi_class='ovr')
except ValueError as e:
    roc = float('nan')
    print(f"Warning: AUC couldn't be calculated — {e}")

# Output
print(f"MLP Classifier - Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}, AUC: {roc:.4f}")

MLP Classifier - Accuracy: 0.9994, Precision: 0.9993, Recall: 0.9994, F1-Score: 0.9994, AUC: 0.9838


##C. CNN Classifier (PyTorch)

###Persiapan Data

In [16]:
print(X_train_clf.shape)

(227845, 29)


In [20]:
import numpy as np
import torch

# Cek jumlah fitur agar tahu bentuk yang bisa digunakan
n_features = X_train_clf.shape[1]
print(f"Jumlah fitur: {n_features}")

Jumlah fitur: 29


In [30]:
import torch

# Ambil 25 fitur pertama agar bisa jadi 5x5
X_train_crop = X_train_clf[:, :25]
X_test_crop = X_test_clf[:, :25]

# Ubah ke (batch_size, channel=1, height=5, width=5)
X_train_cnn = X_train_crop.reshape(-1, 1, 5, 5)
X_test_cnn = X_test_crop.reshape(-1, 1, 5, 5)

# Konversi ke tensor PyTorch
X_train_tensor = torch.tensor(X_train_cnn, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_cnn, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train_clf, dtype=torch.long)
y_test_tensor = torch.tensor(y_test_clf, dtype=torch.long)

# DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [31]:
class CNNClassifier(nn.Module):
    def __init__(self, num_classes):
        super(CNNClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)  # Output: 16x5x5
        self.pool = nn.MaxPool2d(2, 2)                           # Output: 16x2x2
        self.fc1 = nn.Linear(16 * 2 * 2, num_classes)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # (batch_size, 16, 2, 2)
        x = x.view(x.size(0), -1)                 # Flatten ke (batch_size, 64)
        x = self.fc1(x)
        return x

###Training

In [32]:
import torch.nn as nn
import torch.optim as optim

model = CNNClassifier(num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(50):
    model.train()
    running_loss = 0.0

    for x_batch, y_batch in train_loader:
        x_batch = x_batch.float()
        y_batch = y_batch.long()

        output = model(x_batch)
        loss = criterion(output, y_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/50, Loss: {running_loss/len(train_loader):.4f}")

Epoch 1/50, Loss: 0.0098
Epoch 2/50, Loss: 0.0043
Epoch 3/50, Loss: 0.0038
Epoch 4/50, Loss: 0.0035
Epoch 5/50, Loss: 0.0034
Epoch 6/50, Loss: 0.0033
Epoch 7/50, Loss: 0.0033
Epoch 8/50, Loss: 0.0032
Epoch 9/50, Loss: 0.0032
Epoch 10/50, Loss: 0.0032
Epoch 11/50, Loss: 0.0031
Epoch 12/50, Loss: 0.0031
Epoch 13/50, Loss: 0.0031
Epoch 14/50, Loss: 0.0031
Epoch 15/50, Loss: 0.0031
Epoch 16/50, Loss: 0.0031
Epoch 17/50, Loss: 0.0030
Epoch 18/50, Loss: 0.0030
Epoch 19/50, Loss: 0.0030
Epoch 20/50, Loss: 0.0030
Epoch 21/50, Loss: 0.0030
Epoch 22/50, Loss: 0.0029
Epoch 23/50, Loss: 0.0030
Epoch 24/50, Loss: 0.0029
Epoch 25/50, Loss: 0.0030
Epoch 26/50, Loss: 0.0030
Epoch 27/50, Loss: 0.0029
Epoch 28/50, Loss: 0.0029
Epoch 29/50, Loss: 0.0030
Epoch 30/50, Loss: 0.0029
Epoch 31/50, Loss: 0.0029
Epoch 32/50, Loss: 0.0030
Epoch 33/50, Loss: 0.0029
Epoch 34/50, Loss: 0.0029
Epoch 35/50, Loss: 0.0029
Epoch 36/50, Loss: 0.0029
Epoch 37/50, Loss: 0.0029
Epoch 38/50, Loss: 0.0029
Epoch 39/50, Loss: 0.

###Evaluasi CNN

In [36]:
print("Unique labels in y_test_clf:", np.unique(y_test_clf))
print("Shape of probs (model output):", probs.shape)
print("Shape of y_test_bin:", y_test_bin.shape)

Unique labels in y_test_clf: [0 1]
Shape of probs (model output): (56962, 2)
Shape of y_test_bin: (56962, 1)


In [37]:
from sklearn.preprocessing import label_binarize

model.eval()
with torch.no_grad():
    logits = model(X_test_tensor)
    probs = nn.functional.softmax(logits, dim=1).numpy()
    preds = np.argmax(probs, axis=1)

# Hitung metrik klasifikasi
acc = accuracy_score(y_test_clf, preds)
prec = precision_score(y_test_clf, preds, average='weighted')
rec = recall_score(y_test_clf, preds, average='weighted')
f1 = f1_score(y_test_clf, preds, average='weighted')

# Dapatkan jumlah kelas dari model output
n_classes = probs.shape[1]
class_list = list(range(n_classes))  # [0, 1, 2, ..., n_classes-1]

# Paksa y_test_clf dibinarisasi dengan semua kelas (meskipun y_test_clf tidak mengandung semuanya)
y_test_bin = label_binarize(y_test_clf, classes=class_list)

# Jika y_test_bin.shape tidak cocok, tambahkan kolom dummy
if y_test_bin.shape[1] < n_classes:
    diff = n_classes - y_test_bin.shape[1]
    padding = np.zeros((y_test_bin.shape[0], diff))
    y_test_bin = np.hstack([y_test_bin, padding])

# Hitung ROC AUC
roc = roc_auc_score(y_test_bin, probs, multi_class='ovr')

print(f"CNN Classifier - Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}, AUC: {roc:.4f}")

CNN Classifier - Accuracy: 0.9995, Precision: 0.9994, Recall: 0.9995, F1-Score: 0.9994, AUC: nan


