# HW3-图像分类  
## <font color=green> 目标:  
(1)通过卷积神经网络模型来搭建图像分类模型。  
(2)通过数据拓展来提升模型效果。  
(3)了解如何利用未标记的数据以及如何从中受益。  
</font>  


## 食物分类:  
![avatar](pvg/p1.png)  
这些图像来自于food-11数据集，所有的数据被划分为11类。   
这里的数据集略有修改   
训练集:280 * 11标记图像+ 6786未标记图像  
验证集:30 * 11标记图像   
测试集:3347图像   
不要使用原始数据集或标签

### 1.导入依赖包  
本次项目要高度依赖TorchVision,可以将其视为一个依赖于Torch的视觉模型专用包

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import ConcatDataset, DataLoader, Subset
from torchvision.datasets import DatasetFolder
from tqdm.auto import tqdm

### 2.Dataset, Data Loader, and Transforms  
Torchvision提供了大量关于图像处理的工具，比如数据打包和数据拓展等。  
本次图像存储在标签文件夹中，我们可以直接通过这些包来打包数据。 

In [2]:
train_tfm = transforms.Compose([
    transforms.Resize((128,128)),  # 将图片转化为128*128像素的图片
    transforms.ToTensor(),  # 将图片向量转化为张量
])
test_tfm = transforms.Compose([
    transforms.Resize((128,128)),
    transforms.ToTensor(),
])
batch_size = 128

train_set = DatasetFolder("data/training", loader=lambda x: Image.open(x), extensions="jpg", transform=train_tfm)
valid_set = DatasetFolder("data/validation", loader=lambda x: Image.open(x), extensions="jpg", transform=test_tfm)

# Construct data loaders.
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True,  pin_memory=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True,  pin_memory=True)

### 3.模型构建

In [3]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # The arguments for commonly used modules:
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)

        # input image size: [3, 128, 128]
        self.cnn_layers = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),

            nn.Conv2d(64, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),

            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(4, 4, 0),
        )
        self.fc_layers = nn.Sequential(
            nn.Linear(256 * 8 * 8, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 11)
        )

    def forward(self, x):
        # input (x): [batch_size, 3, 128, 128]
        # output: [batch_size, 11]

        # Extract features by convolutional layers.
        x = self.cnn_layers(x)

        # The extracted feature map must be flatten before going to fully-connected layers.
        x = x.flatten(1)

        # The features are transformed by fully-connected layers to obtain the final logits.
        x = self.fc_layers(x)
        return x

### 4.训练模型

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize a model, and put it on the device specified.
model = Classifier().to(device)
model.device = device

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)

# The number of training epochs.
n_epochs = 80

# Whether to do semi-supervised learning.

for epoch in range(n_epochs):
    # ---------- TODO ----------
    # In each epoch, relabel the unlabeled dataset for semi-supervised learning.
    # Then you can combine the labeled dataset and pseudo-labeled dataset for the training.
    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    # Iterate the training set by batches.
    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)

    # The average loss and accuracy of the training set is the average of the recorded values.
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 001/080 ] loss = 1.99787, acc = 0.29537


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 002/080 ] loss = 1.64149, acc = 0.43680


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 003/080 ] loss = 1.45473, acc = 0.49429


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 004/080 ] loss = 1.30148, acc = 0.54497


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 005/080 ] loss = 1.18796, acc = 0.58578


  0%|          | 0/78 [00:00<?, ?it/s]