Họ và tên: Võ Hồng Quân 
$ \\ $
MSSV: 22134012

# Ex 11. CNN

In [17]:
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import scipy
import sklearn.model_selection
import tensorflow as tf
import plotly

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device} device")

Using cpu device


#  Load dataset 

In [3]:
mat_file = scipy.io.loadmat('ex7data.mat')

In [4]:
X, y = mat_file['X'].astype('float32'), np.ravel(mat_file['y'])

In [5]:
# Modify label 1-10 into 0-9
y[y == 10] = 0

In [13]:
X = X.reshape(5000, 1, 20, 20)

In [14]:
X_new, X_test, y_new, y_test =  sklearn.model_selection.train_test_split(X, y, test_size= 0.3, train_size= 0.7,
                                                random_state= 1, shuffle=True, stratify= y)

X_train, X_valid, y_train, y_valid = sklearn.model_selection.train_test_split(X_new, y_new, test_size= 0.1, 
                                                train_size= 0.9, shuffle=True, stratify= y_new)

# Define

In [7]:
# Dataset class
class GetDataset(torch.utils.data.Dataset):
    def __init__(self, data, label):
        self.data = data.astype(np.float32)
        self.label = label.astype(np.int64)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        image = self.data[idx]
        label = self.label[idx]
        return image, label

In [8]:
class CNN(torch.nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.cnn_stack = torch.nn.Sequential(
            torch.nn.Conv2D(in_features=input_size, out_features=50),
            torch.nn.Sigmoid(),
            torch.nn.Conv2D(in_features=50, out_features=50),
            torch.nn.Sigmoid(),
            torch.nn.Conv2D(in_features=50, out_features=10)
        )
    def forward(self, X):
        logits = self.cnn_stack(X)
        return logits

In [9]:
def train_loop(dataloader, model, loss_fn, optimizer, device):
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction and loss
        y_pred = model(X)
        loss = loss_fn(y_pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return loss.item()

def valid_loop(dataloader, model, loss_fn, device):
    model.eval()
    num_batches = len(dataloader)
    test_loss, correct = 0, 0 

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            test_loss += loss_fn(y_pred, y).item()
            correct += (y_pred.argmax(1) == y).type(torch.float32).sum().item()

    test_loss /= num_batches
    correct /= len(dataloader.dataset)
    return test_loss, correct

In [10]:
# Create datasets and data loaders
train_data = GetDataset(X_train, y_train)
valid_data = GetDataset(X_valid, y_valid)
test_data = GetDataset(X_test, y_test)

train_dataloader = torch.utils.data.DataLoader(dataset=train_data, batch_size=200, shuffle=True)
valid_dataloader = torch.utils.data.DataLoader(dataset=valid_data, batch_size=20, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(dataset=test_data, batch_size=200, shuffle=True)

In [11]:
input_size = X_train.shape[1]
epochs = 500
loss_fn = torch.nn.CrossEntropyLoss()
model = NeuralNetWork(input_size=input_size).to(device)

In [12]:
# Function to reset the model weights
def reset_model_weights(model):
    for layer in model.children():
        if hasattr(layer, 'reset_parameters'):
            layer.reset_parameters()

# So sánh giữa SGD có và không có momentum, điều chỉnh hệ số momentum

## Không có momentum

In [13]:
# Loss and optimizer
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)
lr = 0.11
optim_not_momentum = torch.optim.SGD(model.parameters(), lr=lr)

In [14]:
# Training
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optim_not_momentum, device)
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 100, Train Loss: 2.2978, Valid Loss: 2.2971, Valid Accuracy: 0.4629
Epoch 200, Train Loss: 2.2893, Valid Loss: 2.2893, Valid Accuracy: 0.5829
Epoch 300, Train Loss: 2.2736, Valid Loss: 2.2747, Valid Accuracy: 0.5029
Epoch 400, Train Loss: 2.2286, Valid Loss: 2.2366, Valid Accuracy: 0.4543
Epoch 500, Train Loss: 2.1108, Valid Loss: 2.1207, Valid Accuracy: 0.4057
Test Loss: 2.1131, Test Accuracy: 0.4067


In [15]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

## Có momentum

In [16]:
# Experiment 2: SGD with momentum
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)
lr = 0.1
momentum_value = 0.9
optimizer_sgd_momentum = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum_value)


In [17]:
# Training
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer_sgd_momentum, device)
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 100, Train Loss: 0.0743, Valid Loss: 0.3683, Valid Accuracy: 0.9286
Epoch 200, Train Loss: 0.0084, Valid Loss: 0.4226, Valid Accuracy: 0.9257
Epoch 300, Train Loss: 0.0038, Valid Loss: 0.4651, Valid Accuracy: 0.9257
Epoch 400, Train Loss: 0.0024, Valid Loss: 0.4524, Valid Accuracy: 0.9257
Epoch 500, Train Loss: 0.0021, Valid Loss: 0.5269, Valid Accuracy: 0.9257
Test Loss: 0.4012, Test Accuracy: 0.9213


In [18]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

### Kết luận: có momentum giúp mô hình hội tụ nhanh hơn, độ chính xác cao hơn so với khi không có momentum

# So sánh giữa momentum và nesterov momentum

In [19]:
# Loss and optimizer
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)
lr = 0.1
momentum_value = 0.9
optimizer_nesterov = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum_value, nesterov=True)


In [20]:
# Training
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer_nesterov, device)
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 100, Train Loss: 0.0364, Valid Loss: 0.3706, Valid Accuracy: 0.9200
Epoch 200, Train Loss: 0.0117, Valid Loss: 0.4132, Valid Accuracy: 0.9343
Epoch 300, Train Loss: 0.0032, Valid Loss: 0.4273, Valid Accuracy: 0.9343
Epoch 400, Train Loss: 0.0028, Valid Loss: 0.4361, Valid Accuracy: 0.9371
Epoch 500, Train Loss: 0.0014, Valid Loss: 0.4522, Valid Accuracy: 0.9371
Test Loss: 0.3894, Test Accuracy: 0.9187


In [21]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

### Kết luận: nesterov momentum trong trường hợp này hội tụ lâu hơn và có độ chính xác thấp hơn mometum thông thường

# So sánh giữa các loại optimizer: SGD, RMSPro, và Adam

## RMSProp

In [22]:
# Loss and optimizer
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)
lr = 0.01
optimizer_rmsprop = torch.optim.RMSprop(model.parameters(), lr=lr)

# Training
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer_rmsprop, device)
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 100, Train Loss: 0.0000, Valid Loss: 0.7756, Valid Accuracy: 0.9171
Epoch 200, Train Loss: 0.0000, Valid Loss: 0.8550, Valid Accuracy: 0.9114
Epoch 300, Train Loss: 0.0000, Valid Loss: 0.9131, Valid Accuracy: 0.9143
Epoch 400, Train Loss: 0.0000, Valid Loss: 0.8603, Valid Accuracy: 0.9143
Epoch 500, Train Loss: 0.0000, Valid Loss: 0.8444, Valid Accuracy: 0.9171
Test Loss: 0.6249, Test Accuracy: 0.9333


In [23]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

## Adam 

In [24]:
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)
lr = 0.001
optimizer_adam = torch.optim.Adam(model.parameters(), lr=lr)

In [25]:
# Training
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer_adam, device)
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 100, Train Loss: 0.1051, Valid Loss: 0.3461, Valid Accuracy: 0.9171
Epoch 200, Train Loss: 0.0236, Valid Loss: 0.3658, Valid Accuracy: 0.9143
Epoch 300, Train Loss: 0.0030, Valid Loss: 0.4153, Valid Accuracy: 0.9171
Epoch 400, Train Loss: 0.0014, Valid Loss: 0.4424, Valid Accuracy: 0.9171
Epoch 500, Train Loss: 0.0004, Valid Loss: 0.4772, Valid Accuracy: 0.9200
Test Loss: 0.4112, Test Accuracy: 0.9193


In [26]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

### Kết luận: Adam khi cùng learning rate với SGD và RMSProp thì SGD> Adam > RMSProp 

# Thử các loại Learning Rate Schedules khác nhau và đánh giá

## Step Learning Rate Schedule

In [27]:
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)

lr = 0.3
optimizer_sgd = torch.optim.SGD(model.parameters(), lr=lr)
scheduler_step = torch.optim.lr_scheduler.StepLR(optimizer_sgd, step_size=50, gamma=0.1)


In [28]:
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer_sgd, device)  # Pass optimizer, not scheduler
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    
    # Step the scheduler at the end of each epoch
    scheduler_step.step()
    
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


Epoch 100, Train Loss: 0.4912, Valid Loss: 0.5472, Valid Accuracy: 0.8686
Epoch 200, Train Loss: 0.5071, Valid Loss: 0.5607, Valid Accuracy: 0.8714
Epoch 300, Train Loss: 0.3665, Valid Loss: 0.5457, Valid Accuracy: 0.8714
Epoch 400, Train Loss: 0.5099, Valid Loss: 0.5414, Valid Accuracy: 0.8714
Epoch 500, Train Loss: 0.3811, Valid Loss: 0.5367, Valid Accuracy: 0.8714
Test Loss: 0.5191, Test Accuracy: 0.8527


In [29]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

## Exponential Learning Rate Schedule

In [30]:
model = NeuralNetWork(input_size=input_size).to(device)
reset_model_weights(model)

lr = 0.3
optimizer_sgd = torch.optim.SGD(model.parameters(), lr=lr)
scheduler_exp = torch.optim.lr_scheduler.ExponentialLR(optimizer_sgd, gamma=0.99)

In [31]:
loss_train = []
loss_valid = []

for epoch in range(epochs):
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer_sgd, device)
    valid_loss, valid_accuracy = valid_loop(valid_dataloader, model, loss_fn, device)
    
    loss_train.append(train_loss)
    loss_valid.append(valid_loss)
    
    # Step the scheduler at the end of each epoch
    scheduler_exp.step()

    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {valid_accuracy:.4f}")

# Testing
test_loss, test_accuracy = valid_loop(test_dataloader, model, loss_fn, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


Epoch 100, Train Loss: 0.3821, Valid Loss: 0.5329, Valid Accuracy: 0.8857
Epoch 200, Train Loss: 0.2828, Valid Loss: 0.4850, Valid Accuracy: 0.8943
Epoch 300, Train Loss: 0.2908, Valid Loss: 0.4450, Valid Accuracy: 0.8971
Epoch 400, Train Loss: 0.2965, Valid Loss: 0.4424, Valid Accuracy: 0.9000
Epoch 500, Train Loss: 0.3086, Valid Loss: 0.4605, Valid Accuracy: 0.9029
Test Loss: 0.4260, Test Accuracy: 0.8800


In [32]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Scatter(x= np.arange(len(loss_train)), y= loss_train, name= 'Train'))
fig.add_trace(go.Scatter(x= np.arange(len(loss_valid)), y= loss_valid, name= 'Valid'))
fig.show()

### Kết luận: Step Learning Rate Schedule hội tụ lâu và có độ chính xác thấp hơn Exponential Learning Rate Schedule