**Tải bộ dữ liệu**

In [1]:
!gdown --id 1qiUDDoYyRLBiKOoYWdFl_5WByHE8Cugu

Downloading...
From: https://drive.google.com/uc?id=1qiUDDoYyRLBiKOoYWdFl_5WByHE8Cugu
To: d:\AIO2024\module_5\week_3\Auto_MPG_data.csv

  0%|          | 0.00/15.4k [00:00<?, ?B/s]
100%|██████████| 15.4k/15.4k [00:00<00:00, 7.70MB/s]


**Import các thư viện cần thiết**

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

**Cài đặt giá trị ngẫu nhiên cố định**

In [3]:
random_state = 59
np.random.seed(random_state)
torch.manual_seed(random_state)
if torch.cuda.is_available():
    torch.cuda.manual_seed(random_state)

**Cài đặt thiết bị tính toán**

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

**Đọc bộ dữ liệu**

In [5]:
dataset_path = 'Auto_MPG_data.csv'
dataset = pd.read_csv(dataset_path)
dataset.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Europe,Japan,USA
0,18.0,8,307.0,130.0,3504.0,12.0,70,0,0,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,0,0,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,0,0,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,0,0,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,0,0,1


**Tiền xử lý bộ dữ liệu**

In [6]:
X = dataset.drop(columns='MPG').values
y = dataset['MPG'].values

In [None]:
val_size = 0.2
test_size = 0.125
is_shuffle = True

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=val_size,
                                                  random_state=random_state,
                                                  shuffle=is_shuffle)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train,
                                                    test_size=test_size,
                                                    random_state=random_state,
                                                    shuffle=is_shuffle)

In [8]:
normalizer = StandardScaler()
X_train = normalizer.fit_transform(X_train)
X_val = normalizer.transform(X_val)
X_test = normalizer.transform(X_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

**Xây dựng DataLoader**

In [9]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [10]:
batch_size = 32
train_dataset = CustomDataset(X_train, y_train)
val_dataset = CustomDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

**Xây dựng mạng MLP**

In [11]:
class MLP(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(input_dims, hidden_dims)
        self.linear2 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)

    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        x = F.relu(x)
        out = self.output(x)

        return out.squeeze(1)

In [12]:
input_dims = X_train.shape[1]
hidden_dims = 64
output_dims = 1

model = MLP(input_dims=input_dims,
            hidden_dims=hidden_dims,
            output_dims=output_dims).to(device)

**Khai báo hàm loss và optimizer**

In [13]:
lr = 1e-2
criteria = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

**Xây dựng hàm tính điểm R2**

In [14]:
def r_squared(y_true, y_pred):
    y_true = torch.Tensor(y_true).to(device)
    y_pred = torch.Tensor(y_pred).to(device)
    mean_true = torch.mean(y_true)
    ss_res = torch.sum((y_true - y_pred) ** 2)
    ss_tot = torch.sum((y_true - mean_true) ** 2)
    return 1 - ss_res / ss_tot

**Huấn luyện mô hình**

In [15]:
epochs = 100
train_losses = []
val_losses = []
train_r2 = []
val_r2 = []

for epoch in range(epochs):
    train_loss = 0.0
    train_target = []
    val_target = []
    train_prediction = []
    val_prediction = []
    model.train()

    for X_samples, y_samples in train_loader:
        X_samples, y_samples = X_samples.to(device), y_samples.to(device)
        optimizer.zero_grad()
        y_pred = model(X_samples)
        loss = criteria(y_pred, y_samples)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        train_target.extend(y_samples.tolist())
        train_prediction.extend(y_pred.tolist())

    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    train_r2.append(r_squared(train_target, train_prediction))

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_samples, y_samples in val_loader:
            X_samples, y_samples = X_samples.to(device), y_samples.to(device)
            y_pred = model(X_samples)
            loss = criteria(y_pred, y_samples)
            val_loss += loss.item()
            val_target.extend(y_samples.tolist())
            val_prediction.extend(y_pred.tolist())

    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    val_r2.append(r_squared(val_target, val_prediction))
    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Train R2: {train_r2[-1]:.4f}, Val R2: {val_r2[-1]:.4f}')


Epoch 1/100, Train Loss: 282.7695, Val Loss: 88.6705, Train R2: -3.8814, Val R2: -0.5093
Epoch 2/100, Train Loss: 137.6403, Val Loss: 72.2927, Train R2: -1.3779, Val R2: -0.3114
Epoch 3/100, Train Loss: 71.2069, Val Loss: 19.6682, Train R2: -0.2469, Val R2: 0.6610
Epoch 4/100, Train Loss: 24.2861, Val Loss: 166.9051, Train R2: 0.6032, Val R2: -1.6937
Epoch 5/100, Train Loss: 89.4771, Val Loss: 19.8156, Train R2: -0.5571, Val R2: 0.6570
Epoch 6/100, Train Loss: 17.9819, Val Loss: 17.1159, Train R2: 0.6947, Val R2: 0.7122
Epoch 7/100, Train Loss: 20.6666, Val Loss: 8.6691, Train R2: 0.6392, Val R2: 0.8616
Epoch 8/100, Train Loss: 38.5446, Val Loss: 36.0796, Train R2: 0.3287, Val R2: 0.4290
Epoch 9/100, Train Loss: 20.4251, Val Loss: 30.1564, Train R2: 0.7005, Val R2: 0.5250
Epoch 10/100, Train Loss: 20.3079, Val Loss: 49.4825, Train R2: 0.6606, Val R2: 0.2226
Epoch 11/100, Train Loss: 33.8492, Val Loss: 21.4786, Train R2: 0.4122, Val R2: 0.6376
Epoch 12/100, Train Loss: 10.1364, Val Loss

**Đánh giá mô hình**

In [16]:
model.eval()
with torch.no_grad():
    y_hat = model(X_test.to(device)).cpu().numpy()
    test_r2 = r_squared(y_test, y_hat)
    print(f'Test R2: {test_r2:.4f}')

Test R2: 0.8792
