# FDU PRML 2024 Fall Assignment 1

Name: 李增昊

Student ID: 22307130108

Please follow the instructions and complete the following exercises using PyTorch.

## 1. Basic Operations of Tensors

In [35]:
import torch

my_first_tensor = torch.ones((3,4))  # TODO: assign a tensor of shape (3, 4) with all elements equal to 1.0

my_second_tensor = torch.randn((3, 4))  # TODO: assign a random tensor of shape (3, 4) with all elements sampled from a standard normal distribution

their_matrix_product = torch.matmul(my_first_tensor, my_second_tensor.T)  # TODO: compute the matrix product of my_first_tensor and the transpose of my_second_tensor (There are multiple ways to do this. Just pick one you like.)

some_meaningless_concatenation = torch.cat((my_first_tensor, my_second_tensor), dim=0)  # TODO: concatenate my_first_tensor and my_second_tensor along the first dimension. (Maybe you should check the documentation of torch.cat)

some_meaningless_stack = torch.stack((my_first_tensor, my_first_tensor, my_first_tensor, my_first_tensor, my_first_tensor), dim=0) 
# TODO: stack 5 copies of my_first_tensor along a newly created dimension. (Maybe you should check the documentation of torch.stack)
# What is the shape of some_meaningless_stack? Can you imagine the geometric interpretation of stacking 5 matrices of shape (3, 4) along the first dimension?

#some_meaningless_stack的形状应该是[5, 3, 4]。这个操作就像是把同形状的张量在新的维度叠起来

## 2. A simple logistic regression

There are 4 core components in Pytorch training process: **model**, **loss function**, **optimizer** and **data loader**. In this part, we will implement a simple logistic regression model to illustrate them.

### 2.1 Model and Loss Function

In [36]:
# Define a linear layer for logitstic regression

class Linear(torch.nn.Module):
	def __init__(self, input_dim, output_dim):
		super().__init__()
		# TODO: initialize the weight and bias of the linear layer.
		self.weight = torch.nn.Parameter(torch.randn(input_dim, output_dim)*0.01)
		self.bias = torch.nn.Parameter(torch.zeros(output_dim))
  
	def forward(self, x):
		pass
		# TODO: implement the forward function of a linear layer.
		return x @ self.weight + self.bias
		# return torch.sigmoid(X@self.weight + self.bias)


def loss_function(y_pred, y):
    # TODO: implement the loss function of logistic regression.
	return torch.nn.functional.binary_cross_entropy_with_logits(y_pred, y)
	#return torch.mean(y*torch.log(y_pred) + (1-y)*torch.log(1-y_pred))
	

### Synthetic Data

In real world, we usually have to deal with large-scale datasets. However, in this assignment, we will use synthetic data to illustrate the training process. The synthetic data is generated by the following function:

In [37]:
# Generate some random data for binary classification

num_samples = 100
num_features = 2

x_0 = torch.randn(num_samples, num_features) + torch.tensor([2.0, 2.0])
y_0 = torch.zeros(num_samples)

x_1 = torch.randn(num_samples, num_features) + torch.tensor([-2.0, -2.0])
y_1 = torch.ones(num_samples)

x = torch.cat([x_0, x_1], dim=0)
y = torch.cat([y_0, y_1], dim=0)


### 2.2 Dataloader

In [38]:
# Define a dataset to feed into the model

class MyDataset(torch.utils.data.Dataset):
	def __init__(self, x, y):
		super().__init__()
		self.x = x
		self.y = y

	def __getitem__(self, index):
		# TODO: implement the __getitem__ function.
		return self.x[index], self.y[index]

	def __len__(self):
		# TODO: implement the __len__ function.
		return len(self.x)


dataset = MyDataset(x, y)
dataloder = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True)

### 2.3 Optimizer

In [39]:
my_model = Linear(num_features, 1)

optimizer = torch.optim.AdamW(my_model.parameters(), lr=0.01)  # TODO: initialize an optimizer of your choice.
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)


### Putting all together

Since this is just a toy experiment, we do not need validation.

In the following code, we expect to see the training loss decreasing to 0.001 or lower.

In [43]:
# Train the model

for epoch in range(100):
	for batch_x, batch_y in dataloder:
		# TODO: implement the training loop.
		#由于反向传播的默认行为是让梯度累加，所以归零
		optimizer.zero_grad() 
		y_pred = my_model(batch_x)  
		loss = loss_function(y_pred.squeeze(), batch_y)  
		loss.backward()
		#限制梯度的最大值
		torch.nn.utils.clip_grad_norm_(my_model.parameters(), max_norm=0.3)
		optimizer.step()  

  
	if epoch % 10 == 0:
		print('epoch: {}, loss: {}'.format(epoch, loss.item()))
  
# save the model

torch.save(my_model.state_dict(), 'my_model.pt')

epoch: 0, loss: 0.0016931958962231874
epoch: 10, loss: 0.002506396733224392
epoch: 20, loss: 0.00015973576228134334
epoch: 30, loss: 0.001939267385751009
epoch: 40, loss: 0.0020727734081447124
epoch: 50, loss: 0.0005686646327376366
epoch: 60, loss: 0.0007488030823878944
epoch: 70, loss: 0.00013168402074370533
epoch: 80, loss: 9.199530904879794e-05
epoch: 90, loss: 0.0002735707676038146


## 3. MNIST Classification

In this section, you will use PyTorch to implement a multi-layer perceptron (MLP) model for classifying handwritten digits using the MNIST dataset.


1. Data Loading and Preprocessing:
   - Utilize the `torchvision.datasets` module to load the MNIST dataset.
   - Apply necessary transformations (like `ToTensor` and `Normalize`) to prepare the data for model training. These transformations ensure the data has the correct format and scales, helping with model convergence.
   - Use a DataLoader with a suitable batch size to efficiently manage data feeding into the model.

2. Architecture:
   - Define a simple MLP model with PyTorch's `torch.nn.Module`. A suggested architecture is:
     - An input layer that takes the flattened 28x28 pixel values (784 features).
     - One or more hidden layers with ReLU activations for non-linearity.
     - An output layer with softmax activation for multi-class classification.
   - Make sure to initialize the model appropriately, especially if you're stacking multiple layers.

3. Training:
   - Set up an optimizer (like `Adam` or `SGD`) to minimize the model's error during training. You will also need a loss function, such as `CrossEntropyLoss`, which is well-suited for classification tasks.
   - Write a training loop that performs the following steps:
     - Forward pass: Feed batches through the model to obtain predictions.
     - Compute the loss by comparing predictions with true labels.
     - Backward pass: Calculate gradients for each model parameter.
     - Update the model weights using the optimizer.
   - Periodically log or print the training loss to track progress.

4. Evaluation:
   - After training, evaluate your model on the test set.
   - Compute and print the accuracy metric, and optionally, create a confusion matrix to analyze classification errors.


MNIST: http://yann.lecun.com/exdb/mnist/

In [68]:
from torchvision import datasets, transforms
import torch
from tqdm import tqdm

transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
    ])
dataset1 = datasets.MNIST('./data', train=True, download=True,
                    transform=transform)
dataset2 = datasets.MNIST('./data', train=False,
                    transform=transform)
train_loader = torch.utils.data.DataLoader(dataset=dataset1, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=dataset2, batch_size=128)


In [69]:
# define the model
device = "cuda:0" if torch.cuda.is_available() else "cpu"

class MLP(torch.nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2),

            torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2),

            torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),

            torch.nn.Flatten(),
            torch.nn.Linear(in_features=7*7*64, out_features=128),
            torch.nn.ReLU(),
            torch.nn.Linear(in_features=128, out_features=10),
            torch.nn.Softmax(dim=1)
        )
    
    def forward(self, input):
        return self.model(input)
    

In [70]:
# instance of my MLP model
my_second_model = MLP()
my_second_model.to(device)

MLP(
  (model): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Flatten(start_dim=1, end_dim=-1)
    (9): Linear(in_features=3136, out_features=128, bias=True)
    (10): ReLU()
    (11): Linear(in_features=128, out_features=10, bias=True)
    (12): Softmax(dim=1)
  )
)

In [71]:
# define loss function
LossFunction = torch.nn.CrossEntropyLoss()

In [72]:
# choose the optimizer
MNIST_optimizer = torch.optim.Adam(my_second_model.parameters())

In [74]:
# write the training loop
MAX_EPOCH = 10
for epoch in range(1, MAX_EPOCH + 1):
    processBar = tqdm(train_loader, unit='step')
    my_second_model.train(True)
    for step, (train_img, labels) in enumerate(processBar):
        train_img = train_img.to(device)
        labels = labels.to(device)

        my_second_model.zero_grad()
        # feed the batch and get predictions
        pred = my_second_model(train_img)
        # compute the loss
        loss = LossFunction(pred, labels)

        pred_label = torch.argmax(pred, dim=1)
        accuracy = torch.sum(pred_label == labels)/labels.shape[0]

        # backward pass
        loss.backward()
        # update the model weights using optimizer
        MNIST_optimizer.step()
        processBar.set_description("[%d/%d], Loss: %.4f, Accuracy: %.4f"%(epoch, MAX_EPOCH, loss.item(), accuracy.item()))
    processBar.close()




[1/10], Loss: 1.5648, Accuracy: 0.8958: 100%|██████████| 469/469 [00:41<00:00, 11.42step/s]
[2/10], Loss: 1.4825, Accuracy: 0.9792: 100%|██████████| 469/469 [00:42<00:00, 10.94step/s]
[3/10], Loss: 1.4818, Accuracy: 0.9792: 100%|██████████| 469/469 [00:42<00:00, 10.93step/s]
[4/10], Loss: 1.4726, Accuracy: 0.9896: 100%|██████████| 469/469 [00:43<00:00, 10.85step/s]
[5/10], Loss: 1.4714, Accuracy: 0.9896: 100%|██████████| 469/469 [00:43<00:00, 10.90step/s]
[6/10], Loss: 1.4612, Accuracy: 1.0000: 100%|██████████| 469/469 [00:43<00:00, 10.90step/s]
[7/10], Loss: 1.4729, Accuracy: 0.9896: 100%|██████████| 469/469 [00:43<00:00, 10.82step/s]
[8/10], Loss: 1.4612, Accuracy: 1.0000: 100%|██████████| 469/469 [00:43<00:00, 10.89step/s]
[9/10], Loss: 1.4612, Accuracy: 1.0000: 100%|██████████| 469/469 [00:43<00:00, 10.87step/s]
[10/10], Loss: 1.4612, Accuracy: 1.0000: 100%|██████████| 469/469 [00:43<00:00, 10.88step/s]


In [77]:
# Evaluate the model on test set
my_second_model.train(False)

total_loss = 0
count = 0

for test_img, labels in test_loader:
    test_img = test_img.to(device)
    labels = labels.to(device)

    temp = my_second_model(test_img)
    pred_label = torch.argmax(temp, dim=1)
    loss = LossFunction(temp, labels)

    total_loss +=loss
    count += torch.sum(pred_label == labels)

accuracy = count/(128*len(test_loader))
final_loss = total_loss/(len(test_loader))

print("Loss on test set: %.4f, Accuracy on test set: %.4f"%(final_loss.item(), accuracy.item()))

Loss on test set: 1.4713, Accuracy on test set: 0.9788
