In [1]:
import numpy as np
import torch
import scipy
import sklearn.model_selection
import tensorflow as tf
import plotly

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device} device")

Using cpu device


#  Load dataset 

In [3]:
mat_file = scipy.io.loadmat('ex7data.mat')

In [4]:
X, y = mat_file['X'].astype('float32'), np.ravel(mat_file['y'])

In [5]:
# Modify label 1-10 into 0-9
y[y == 10] = 0

In [6]:
X_new, X_test, y_new, y_test =  sklearn.model_selection.train_test_split(X, y, test_size= 0.3, train_size= 0.7,
                                                random_state= 1, shuffle=True, stratify= y)

X_train, X_valid, y_train, y_valid = sklearn.model_selection.train_test_split(X_new, y_new, test_size= 0.1, 
                                                train_size= 0.9, shuffle=True, stratify= y_new)

# Define

In [7]:
# Dataset class
class GetDataset(torch.utils.data.Dataset):
    def __init__(self, data, label):
        self.data = data.astype(np.float32)
        self.label = label.astype(np.int64)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        image = self.data[idx]
        label = self.label[idx]
        return image, label

In [8]:
class NeuralNetWork(torch.nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.linear_sigmoid_stack = torch.nn.Sequential(
            torch.nn.Linear(in_features=input_size, out_features=50),
            torch.nn.Sigmoid(),
            torch.nn.Linear(in_features=50, out_features=50),
            torch.nn.Sigmoid(),
            torch.nn.Linear(in_features=50, out_features=10)
        )
    def forward(self, X):
        logits = self.linear_sigmoid_stack(X)
        return logits

In [9]:
def train_loop(dataloader, model, loss_fn, optimizer, device):
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction and loss
        y_pred = model(X)
        loss = loss_fn(y_pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return loss.item()

def valid_loop(dataloader, model, loss_fn, device):
    model.eval()
    num_batches = len(dataloader)
    test_loss, correct = 0, 0 

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            test_loss += loss_fn(y_pred, y).item()
            correct += (y_pred.argmax(1) == y).type(torch.float32).sum().item()

    test_loss /= num_batches
    correct /= len(dataloader.dataset)
    return test_loss, correct

In [10]:
# Create datasets and data loaders
train_data = GetDataset(X_train, y_train)
valid_data = GetDataset(X_valid, y_valid)
test_data = GetDataset(X_test, y_test)

train_dataloader = torch.utils.data.DataLoader(dataset=train_data, batch_size=200, shuffle=True)
valid_dataloader = torch.utils.data.DataLoader(dataset=valid_data, batch_size=20, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(dataset=test_data, batch_size=200, shuffle=True)

In [11]:
input_size = X_train.shape[1]
epochs = 500
loss_fn = torch.nn.CrossEntropyLoss()
model = NeuralNetWork(input_size=input_size).to(device)

In [12]:
# Function to reset the model weights
def reset_model_weights(model):
    for layer in model.children():
        if hasattr(layer, 'reset_parameters'):
            layer.reset_parameters()

# So sánh giữa SGD có và không có momentum, điều chỉnh hệ số momentum

## Không có momentum

In [13]:
# Loss and optimizer
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)
lr = 0.3
optim_not_momentum = torch.optim.SGD(model.parameters(), lr=lr)

In [14]:
# Training
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optim_not_momentum, device)
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 100, Train Loss: 0.2126, Valid Loss: 0.4125, Valid Accuracy: 0.8629
Epoch 200, Train Loss: 0.1442, Valid Loss: 0.3091, Valid Accuracy: 0.9029
Epoch 300, Train Loss: 0.0614, Valid Loss: 0.2809, Valid Accuracy: 0.9114
Epoch 400, Train Loss: 0.0286, Valid Loss: 0.2656, Valid Accuracy: 0.9229
Epoch 500, Train Loss: 0.0179, Valid Loss: 0.2836, Valid Accuracy: 0.9343
Test Loss: 0.3310, Test Accuracy: 0.9180


In [15]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

## Có momentum

In [16]:
# Experiment 2: SGD with momentum
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)
lr = 0.1
momentum_value = 0.9
optimizer_sgd_momentum = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum_value)


In [17]:
# Training
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer_sgd_momentum, device)
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 100, Train Loss: 0.0404, Valid Loss: 0.2980, Valid Accuracy: 0.9314
Epoch 200, Train Loss: 0.0078, Valid Loss: 0.3008, Valid Accuracy: 0.9400
Epoch 300, Train Loss: 0.0057, Valid Loss: 0.3113, Valid Accuracy: 0.9400
Epoch 400, Train Loss: 0.0019, Valid Loss: 0.3356, Valid Accuracy: 0.9400
Epoch 500, Train Loss: 0.0015, Valid Loss: 0.3272, Valid Accuracy: 0.9400
Test Loss: 0.4130, Test Accuracy: 0.9167


In [18]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

### Kết luận: có momentum giúp mô hình hội tụ nhanh hơn, độ chính xác cao hơn so với khi không có momentum

# So sánh giữa momentum và nesterov momentum

In [19]:
# Loss and optimizer
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)
lr = 0.1
momentum_value = 0.9
optimizer_nesterov = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum_value, nesterov=True)


In [20]:
# Training
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer_nesterov, device)
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 100, Train Loss: 0.0376, Valid Loss: 0.2841, Valid Accuracy: 0.9057
Epoch 200, Train Loss: 0.0093, Valid Loss: 0.2924, Valid Accuracy: 0.9229
Epoch 300, Train Loss: 0.0026, Valid Loss: 0.2937, Valid Accuracy: 0.9200
Epoch 400, Train Loss: 0.0036, Valid Loss: 0.3256, Valid Accuracy: 0.9200
Epoch 500, Train Loss: 0.0015, Valid Loss: 0.3243, Valid Accuracy: 0.9200
Test Loss: 0.4221, Test Accuracy: 0.9220


In [21]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

### Kết luận: nesterov momentum trong trường hợp này hội tụ lâu hơn và có độ chính xác thấp hơn mometum thông thường

# So sánh giữa các loại optimizer: SGD, RMSPro, và Adam

## RMSProp

In [22]:
# Loss and optimizer
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)
lr = 0.01
optimizer_rmsprop = torch.optim.RMSprop(model.parameters(), lr=lr)

# Training
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer_rmsprop, device)
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 100, Train Loss: 0.0000, Valid Loss: 0.4065, Valid Accuracy: 0.9343
Epoch 200, Train Loss: 0.0000, Valid Loss: 0.6419, Valid Accuracy: 0.9286
Epoch 300, Train Loss: 0.0000, Valid Loss: 0.5992, Valid Accuracy: 0.9314
Epoch 400, Train Loss: 0.0000, Valid Loss: 0.5783, Valid Accuracy: 0.9371
Epoch 500, Train Loss: 0.0000, Valid Loss: 0.5087, Valid Accuracy: 0.9429
Test Loss: 0.7390, Test Accuracy: 0.9147


In [23]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

## Adam 

In [24]:
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)
lr = 0.001
optimizer_adam = torch.optim.Adam(model.parameters(), lr=lr)

In [25]:
# Training
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer_adam, device)
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 100, Train Loss: 0.0923, Valid Loss: 0.3049, Valid Accuracy: 0.9029
Epoch 200, Train Loss: 0.0204, Valid Loss: 0.3057, Valid Accuracy: 0.9143
Epoch 300, Train Loss: 0.0021, Valid Loss: 0.2858, Valid Accuracy: 0.9200
Epoch 400, Train Loss: 0.0013, Valid Loss: 0.3076, Valid Accuracy: 0.9257
Epoch 500, Train Loss: 0.0003, Valid Loss: 0.3284, Valid Accuracy: 0.9314
Test Loss: 0.3929, Test Accuracy: 0.9213


In [26]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

### Kết luận: Adam khi cùng learning rate với SGD và RMSProp thì SGD> Adam > RMSProp 

# Thử các loại Learning Rate Schedules khác nhau và đánh giá

## Step Learning Rate Schedule

In [41]:
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)

lr = 0.3
optimizer_sgd = torch.optim.SGD(model.parameters(), lr=lr)
scheduler_step = torch.optim.lr_scheduler.StepLR(optimizer_sgd, step_size=50, gamma=0.1)


In [42]:
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer_sgd, device)  # Pass optimizer, not scheduler
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    
    # Step the scheduler at the end of each epoch
    scheduler_step.step()
    
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


Epoch 100, Train Loss: 0.4276, Valid Loss: 0.6021, Valid Accuracy: 0.8314
Epoch 200, Train Loss: 0.4770, Valid Loss: 0.5732, Valid Accuracy: 0.8314
Epoch 300, Train Loss: 0.4668, Valid Loss: 0.5828, Valid Accuracy: 0.8314
Epoch 400, Train Loss: 0.4337, Valid Loss: 0.5719, Valid Accuracy: 0.8314
Epoch 500, Train Loss: 0.5544, Valid Loss: 0.5784, Valid Accuracy: 0.8314
Test Loss: 0.5574, Test Accuracy: 0.8327


In [43]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

## Exponential Learning Rate Schedule

In [52]:
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)

lr = 0.3
optimizer_sgd = torch.optim.SGD(model.parameters(), lr=lr)
scheduler_exp = torch.optim.lr_scheduler.ExponentialLR(optimizer_sgd, gamma=0.99)

In [53]:
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer_sgd, device)
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    
    # Step the scheduler at the end of each epoch
    scheduler_exp.step()

    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


Epoch 100, Train Loss: 0.6849, Valid Loss: 0.5787, Valid Accuracy: 0.8457
Epoch 200, Train Loss: 0.3556, Valid Loss: 0.4317, Valid Accuracy: 0.8771
Epoch 300, Train Loss: 0.3956, Valid Loss: 0.4091, Valid Accuracy: 0.8800
Epoch 400, Train Loss: 0.3969, Valid Loss: 0.3976, Valid Accuracy: 0.8857
Epoch 500, Train Loss: 0.2257, Valid Loss: 0.3924, Valid Accuracy: 0.8829
Test Loss: 0.4081, Test Accuracy: 0.8827


In [54]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

### Kết luận: Step Learning Rate Schedule hội tụ lâu và có độ chính xác thấp hơn Exponential Learning Rate Schedule