In [None]:
# Import necessary packages.
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, ImageFolder
import torch.nn as nn
import matplotlib.pyplot as plt
# This is for the progress bar.
from tqdm import tqdm

In [None]:
class MyDataset(Dataset):              # 自制数据集,继承Dataset,用来生成batch
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __getitem__(self, index):           # 返回的是tensor
        x, y = self.x[index], self.y[index]
        return x, y

    def __len__(self):
        return len(self.x)

## 图片预处理

In [None]:
def gen_loader():
    train_tfm = transforms.Compose([
        # Resize the image into a fixed shape (height = width = 128)
        # transforms.Resize((128, 128)),
        torchvision.transforms.RandomResizedCrop((224, 224), scale=(0.3, 1.0), ratio=(0.75, 1.333)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(30, expand=False, center=None),
        transforms.ToTensor(),  # 这里自带归一化

    ])

    # We don't need augmentations in testing and validation.
    # All we need here is to resize the PIL image and transform it into Tensor.
    test_tfm = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),

    ])


    batch_size = 256        # 保证不超内存,越大越好

    train_set = DatasetFolder("../input/ml2021spring-hw3/food-11/training/labeled", loader=lambda x: Image.open(x), extensions="jpg", transform=train_tfm)
    valid_set = DatasetFolder("../input/ml2021spring-hw3/food-11/validation", loader=lambda x: Image.open(x), extensions="jpg", transform=test_tfm)
    unlabeled_set = DatasetFolder("../input/ml2021spring-hw3/food-11/training/unlabeled", loader=lambda x: Image.open(x), extensions="jpg", transform=train_tfm)
    test_set = DatasetFolder("../input/ml2021spring-hw3/food-11/testing", loader=lambda x: Image.open(x), extensions="jpg", transform=test_tfm)


    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers = 8, pin_memory=True)
    valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers = 8, pin_memory=True)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
    return train_loader, valid_loader, test_loader, train_set, unlabeled_set


In [None]:
class CNN_with_dropout(nn.Module):
    def __init__(self):
        super(CNN_with_dropout, self).__init__()
        # The arguments for commonly used modules:
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)

        # input image size: [3, 224, 224]
        self.cnn_layers = nn.Sequential(
            nn.Conv2d(3, 64, 7, 2, 3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(3, 2, 1),  # [64, 56, 56]

            nn.Conv2d(64, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.Conv2d(64, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [128, 28, 28]

            nn.Conv2d(128, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [128, 14, 14]

            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [256, 7, 7]

            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.AvgPool2d(7, 7, 0),  # [256]
        )
        self.fc_layers = nn.Sequential(
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(64, 11)
        )

    def forward(self, x):
        # Extract features by convolutional layers.
        x = self.cnn_layers(x)
        # The extracted feature map must be flatten before going to fully-connected layers.
        x = x.flatten(1)
        # The features are transformed by fully-connected layers to obtain the final logits.
        x = self.fc_layers(x)
        return x

In [None]:
def get_pseudo_labels(dataset, model, threshold=0.8, batch_size = 256):
    # This functions generates pseudo-labels of a dataset using given model.
    # It returns an instance of DatasetFolder containing images whose prediction confidences exceed a given threshold.
    # You are NOT allowed to use any models trained on external data for pseudo-labeling.
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Construct a data loader.
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    # Make sure the model is in eval mode.
    model.eval()
    # Define softmax function.
    softmax = nn.Softmax(dim=-1)

    # Iterate over the dataset by batches.
    data_x = torch.tensor([])
    data_y = torch.tensor([])
    for batch in tqdm(data_loader):
        img, _ = batch
        # Forward the data
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(img.to(device))

        # Obtain the probability distributions by applying softmax on logits.
        probs = softmax(logits)     # (n_b, 11)
        x, y = torch.max(probs, dim=1)
        y = y.cpu()
        idx = x > threshold
        data_x = torch.cat([data_x, img[idx]])
        data_y = torch.cat([data_y, y[idx]])
        # ---------- TODO ----------
        # Filter the data and construct a new dataset.
    if len(data_x) == 0:
        return None
    new_dataset = MyDataset(data_x, data_y)
    # # Turn off the eval mode.
    model.train()
    return new_dataset

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

In [None]:
# Initialize a model, and put it on the device specified.
model = CNN_with_dropout().to(device)
# model = torchvision.models.resnet18()
# model.fc = nn.Linear(in_features=512, out_features=11, bias=True)
# model = model.to(device)
model.device = device

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)
n_epochs = 200
batch_size = 256
Loss_train = []
Loss_valid = []
Acc_train = []
Acc_valid = []
Best_Acc = 0        # 用来存模型
train_loader, valid_loader, test_loader, train_set, unlabeled_set = gen_loader()
# Whether to do semi-supervised learning.
do_semi = True

for epoch in range(n_epochs):
    # ---------- TODO ----------
    # In each epoch, relabel the unlabeled dataset for semi-supervised learning.
    # Then you can combine the labeled dataset and pseudo-labeled dataset for the training.
    if do_semi and Best_Acc > 0.5 and valid_accs[-1] > 0.5:
        # Obtain pseudo-labels for unlabeled data using trained model.
        pseudo_set = get_pseudo_labels(unlabeled_set, model)
        if pseudo_set != None:
            pseudo_loader = DataLoader(pseudo_set, batch_size=batch_size, shuffle=True)
            # Construct a new dataset and a data loader for training.
            # This is used in semi-supervised learning only.
            # if pseudo_set != None:
            #     concat_dataset = ConcatDataset([train_set, pseudo_set])
            #     train_loader = DataLoader(concat_dataset, batch_size=batch_size, shuffle=True,
            #                           pin_memory=True)
            model.train()
            for batch in tqdm(pseudo_loader):           # unlabel标签训练
                imgs, labels = batch
                logits = model(imgs.to(device))
                loss = loss_func(logits, labels.long().to(device))
                optimizer.zero_grad()
                loss.backward()
                grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
                optimizer.step()

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    # Iterate the training set by batches.
    for batch in tqdm(train_loader):
        # A batch consists of image data and corresponding labels.
        imgs, labels = batch

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = loss_func(logits, labels.long().to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)

    # The average loss and accuracy of the training set is the average of the recorded values.
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    
    
    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):
        # A batch consists of image data and corresponding labels.
        imgs, labels = batch

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = loss_func(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

    Loss_train.append(train_loss)
    Loss_valid.append(valid_loss)
    Acc_train.append(train_acc)
    Acc_valid.append(valid_acc)
    if valid_acc > Best_Acc:
        Best_Acc = valid_acc
        torch.save(model, "model(dropout).pkl")

plt.plot(Loss_train)
plt.plot(Loss_valid)
plt.title('Loss')
plt.legend(['Training set', 'Valid_set'])
plt.show()

plt.plot(Acc_train)
plt.plot(Acc_valid)
plt.title('Accuracy')
plt.legend(['Training set', 'Valid_set'])
plt.show()

In [None]:
model = torch.load("model(dropout).pkl")
model.eval()

# Initialize a list to store the predictions.
predictions = []

# Iterate the testing set by batches.
for batch in tqdm(test_loader):
    # A batch consists of image data and corresponding labels.
    # But here the variable "labels" is useless since we do not have the ground-truth.
    # If printing out the labels, you will find that it is always 0.
    # This is because the wrapper (DatasetFolder) returns images and labels for each batch,
    # so we have to create fake labels to make it work normally.
    imgs, labels = batch

    # We don't need gradient in testing, and we don't even have labels to compute loss.
    # Using torch.no_grad() accelerates the forward process.
    with torch.no_grad():
        logits = model(imgs.to(device))

    # Take the class with greatest logit as prediction and record it.
    predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())

# Save predictions into the file.
with open("predict.csv", "w") as f:

    # The first row must be "Id, Category"
    f.write("Id,Category\n")

    # For the rest of the rows, each image id corresponds to a predicted class.
    for i, pred in  enumerate(predictions):
         f.write(f"{i},{pred}\n")

In [None]:
import time
time.sleep(50000)