Regression Model: MLP Regression: winequality-white.csv


In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import pandas as pd
import numpy as np

In [24]:
# Membaca dataset dengan delimiter yang benar (;)
data = pd.read_csv('winequality-white.csv', delimiter=';')

# Menampilkan beberapa baris pertama untuk memastikan data terbaca dengan benar
print(data.head())


   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.0              0.27         0.36            20.7      0.045   
1            6.3              0.30         0.34             1.6      0.049   
2            8.1              0.28         0.40             6.9      0.050   
3            7.2              0.23         0.32             8.5      0.058   
4            7.2              0.23         0.32             8.5      0.058   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 45.0                 170.0   1.0010  3.00       0.45   
1                 14.0                 132.0   0.9940  3.30       0.49   
2                 30.0                  97.0   0.9951  3.26       0.44   
3                 47.0                 186.0   0.9956  3.19       0.40   
4                 47.0                 186.0   0.9956  3.19       0.40   

   alcohol  quality  
0      8.8        6  
1      9.5        6  
2     10.1        6 

In [None]:
# Menampilkan beberapa baris pertama untuk melihat data secara umum
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [25]:
X = data.iloc[:, :-1].values  # Fitur (semua kolom kecuali 'quality')
y = data.iloc[:, -1].values  # Target (kolom 'quality')

In [26]:
# Normalize features and target
X = (X - X.mean(axis=0)) / X.std(axis=0)  # Normalisasi fitur (mean=0, std=1)
y = (y - y.mean()) / y.std()  # Normalisasi target untuk regresi


In [27]:
# PyTorch Dataset class untuk menangani data
class WineDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)  # Konversi fitur ke tensor PyTorch
        self.y = torch.tensor(y, dtype=torch.float32)  # Konversi target ke tensor PyTorch

    def __len__(self):
        return len(self.X)  # Mengembalikan jumlah data

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]  # Mengembalikan pasangan fitur dan target


In [28]:
# Buat dataset PyTorch
dataset = WineDataset(X, y)


In [29]:
# Split dataset menjadi 80% training dan 20% testing
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])


In [30]:
# Definisi model MLP (Multilayer Perceptron)
class MLPRegressor(nn.Module):
    def __init__(self, input_size, hidden_layers, activation_fn):
        super(MLPRegressor, self).__init__()
        layers = []
        in_features = input_size  # Input size (jumlah fitur)
        for neurons in hidden_layers:
            layers.append(nn.Linear(in_features, neurons))  # Tambahkan layer linear
            layers.append(activation_fn)  # Tambahkan fungsi aktivasi
            in_features = neurons  # Update jumlah neuron untuk layer berikutnya
        layers.append(nn.Linear(in_features, 1))  # Output layer (satu neuron untuk regresi)
        self.model = nn.Sequential(*layers)  # Gabungkan semua layer menjadi satu model

    def forward(self, x):
        return self.model(x)  # Forward pass (prediksi)


In [31]:
# Parameter eksperimen yang akan diuji
hidden_layer_configs = [[4], [8], [16], [32], [64], [4, 8], [8, 16], [16, 32], [32, 64], [4, 8, 16]]  # Kombinasi jumlah neuron
activation_functions = {'linear': nn.Identity(), 'sigmoid': nn.Sigmoid(), 'relu': nn.ReLU(), 'softmax': nn.Softmax(dim=-1), 'tanh': nn.Tanh()}  # Fungsi aktivasi
epochs_list = [1, 10, 25, 50, 100, 250]  # Jumlah epoch
learning_rates = [10, 1, 0.1, 0.01, 0.001, 0.0001]  # Learning rate
batch_sizes = [16, 32, 64, 128, 256, 512]  # Ukuran batch


In [32]:
# Untuk menyimpan hasil
results = []


In [33]:
# Gunakan GPU jika tersedia, jika tidak gunakan CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [34]:
# Loop untuk menguji semua kombinasi parameter
for hidden_layers in hidden_layer_configs:
    for act_name, activation_fn in activation_functions.items():
        for epochs in epochs_list:
            for lr in learning_rates:
                for batch_size in batch_sizes:
                    # Buat DataLoader untuk batching data
                    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
                    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

                    # Buat model, loss function, dan optimizer
                    model = MLPRegressor(X.shape[1], hidden_layers, activation_fn).to(device)  # Model MLP
                    criterion = nn.MSELoss()  # Mean Squared Error untuk regresi
                    optimizer = optim.Adam(model.parameters(), lr=lr)  # Optimizer Adam

                    # Training loop
                    model.train()
                    for epoch in range(epochs):
                        for inputs, targets in train_loader:
                            inputs, targets = inputs.to(device), targets.to(device)  # Pindahkan data ke GPU/CPU
                            optimizer.zero_grad()  # Reset gradien
                            outputs = model(inputs).squeeze()  # Forward pass
                            loss = criterion(outputs, targets)  # Hitung loss
                            loss.backward()  # Backpropagation
                            optimizer.step()  # Update bobot model

                    # Evaluation (uji model)
                    model.eval()
                    test_loss = 0
                    with torch.no_grad():  # Nonaktifkan gradien selama evaluasi
                        for inputs, targets in test_loader:
                            inputs, targets = inputs.to(device), targets.to(device)
                            outputs = model(inputs).squeeze()
                            test_loss += criterion(outputs, targets).item()  # Hitung loss untuk data uji

                    test_loss /= len(test_loader)  # Rata-rata loss
                    # Simpan hasil
                    results.append((hidden_layers, act_name, epochs, lr, batch_size, test_loss))
                    print(f"Hidden Layers: {hidden_layers}, Activation: {act_name}, Epochs: {epochs}, LR: {lr}, Batch Size: {batch_size}, Test Loss: {test_loss:.4f}")


Hidden Layers: [4], Activation: linear, Epochs: 1, LR: 10, Batch Size: 16, Test Loss: 0.9893
Hidden Layers: [4], Activation: linear, Epochs: 1, LR: 10, Batch Size: 32, Test Loss: 7.5991
Hidden Layers: [4], Activation: linear, Epochs: 1, LR: 10, Batch Size: 64, Test Loss: 1469.8445
Hidden Layers: [4], Activation: linear, Epochs: 1, LR: 10, Batch Size: 128, Test Loss: 32806.3987
Hidden Layers: [4], Activation: linear, Epochs: 1, LR: 10, Batch Size: 256, Test Loss: 349638.9766
Hidden Layers: [4], Activation: linear, Epochs: 1, LR: 10, Batch Size: 512, Test Loss: 64887.4238
Hidden Layers: [4], Activation: linear, Epochs: 1, LR: 1, Batch Size: 16, Test Loss: 1.4849
Hidden Layers: [4], Activation: linear, Epochs: 1, LR: 1, Batch Size: 32, Test Loss: 0.7674
Hidden Layers: [4], Activation: linear, Epochs: 1, LR: 1, Batch Size: 64, Test Loss: 0.8740
Hidden Layers: [4], Activation: linear, Epochs: 1, LR: 1, Batch Size: 128, Test Loss: 8.6480
Hidden Layers: [4], Activation: linear, Epochs: 1, LR: