### 在Domain A上训练的图像识别模型, 在Domain B上测试
### simple baseline(lambda=0.1): 51.7%
### 把网络改轻一点: 29%,sb
### 原始网络,feature输出层激活函数换tanh,Classifier加BN,lambda=0.3,500epochs: 32.7%
### 最后的Classifier不加BN: 54%
### 小模型不带BN, lambda=0.3: 57.3%
### lambda=1, 爆train 500epochs: 49.9%
### lambda=3, epoch=200: 51.7%
### lambda用schedule(0->2), canny(200, 300), epoch=400,大模型: 50%
### 复刻一下小模型再用在聚类上

### 训练集图片

In [None]:
import cv2
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
def no_axis_show(img, title='', cmap=None):
    # imshow, and set the interpolation mode to be "nearest"。
    fig = plt.imshow(img, interpolation='nearest', cmap=cmap)
    # do not show the axes in the images.
    fig.axes.get_xaxis().set_visible(False)
    fig.axes.get_yaxis().set_visible(False)
    plt.title(title)

titles = ['horse', 'bed', 'clock', 'apple', 'cat', 'plane', 'television', 'dog', 'dolphin', 'spider']
plt.figure(figsize=(18, 18))
for i in range(10):
    plt.subplot(1, 10, i+1)
    fig = no_axis_show(plt.imread(f'../input/ml2021spring-hw11/real_or_drawing/train_data/{i}/{500*i}.bmp'), title=titles[i])

### 测试集图片

In [None]:
plt.figure(figsize=(18, 18))
for i in range(10):
    plt.subplot(1, 10, i+1)
    fig = no_axis_show(plt.imread(f'../input/ml2021spring-hw11/real_or_drawing/test_data/0/' + str(i).rjust(5, '0') + '.bmp'))

### 创建dataset(用canny做边缘检测把RGB的图变成灰度图)

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function

import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

source_transform = transforms.Compose([
    # Turn RGB to grayscale. (Bacause Canny do not support RGB images.)
    transforms.Grayscale(),
    # cv2 do not support skimage.Image, so we transform it to np.array, 
    # and then adopt cv2.Canny algorithm.
    transforms.Lambda(lambda x: cv2.Canny(np.array(x), 170, 300)),
    # Transform np.array back to the skimage.Image.
    transforms.ToPILImage(),
    # 50% Horizontal Flip. (For Augmentation)
    transforms.RandomHorizontalFlip(),
    # Rotate +- 15 degrees. (For Augmentation), and filled with zero 
    # if there's empty pixel after rotation.
    transforms.RandomRotation(15, fill=(0,)),
    # Transform to tensor for model inputs.
    transforms.ToTensor(),
])
target_transform = transforms.Compose([
    # Turn RGB to grayscale.
    transforms.Grayscale(),
    # Resize: size of source data is 32x32, thus we need to 
    #  enlarge the size of target data from 28x28 to 32x32。
    transforms.Resize((32, 32)),
    # 50% Horizontal Flip. (For Augmentation)
    transforms.RandomHorizontalFlip(),
    # Rotate +- 15 degrees. (For Augmentation), and filled with zero 
    # if there's empty pixel after rotation.
    transforms.RandomRotation(15, fill=(0,)),
    # Transform to tensor for model inputs.
    transforms.ToTensor(),
])

source_dataset = ImageFolder('../input/ml2021spring-hw11/real_or_drawing/train_data', transform=source_transform)
target_dataset = ImageFolder('../input/ml2021spring-hw11/real_or_drawing/test_data', transform=target_transform)

source_dataloader = DataLoader(source_dataset, batch_size=256, shuffle=True)
target_dataloader = DataLoader(target_dataset, batch_size=1024, shuffle=True)
test_dataloader = DataLoader(target_dataset, batch_size=1024, shuffle=False)

### 模型

In [None]:
class FeatureExtractor(nn.Module):

    def __init__(self):
        super(FeatureExtractor, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(1, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(256, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.Tanh(),
            nn.MaxPool2d(2)
        )
        
    def forward(self, x):
        x = self.conv(x).squeeze()
        return x

class LabelPredictor(nn.Module):

    def __init__(self):
        super(LabelPredictor, self).__init__()

        self.layer = nn.Sequential(
            nn.Linear(512, 512),
            nn.ReLU(),

            nn.Linear(512, 512),
            nn.ReLU(),

            nn.Linear(512, 10),
        )

    def forward(self, h):
        c = self.layer(h)
        return c

class DomainClassifier(nn.Module):

    def __init__(self):
        super(DomainClassifier, self).__init__()

        self.layer = nn.Sequential(
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),

            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),

            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),

            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),

            nn.Linear(512, 1),
        )

    def forward(self, h):
        y = self.layer(h)
        return y

### 小模型

In [None]:
# class Test_model(nn.Module):

#     def __init__(self):
#         super(Test_model, self).__init__()

#         self.conv = nn.Sequential(
#             nn.Conv2d(1, 16, 3, 1, 1),
#             nn.BatchNorm2d(16),
#             nn.ReLU(),
#             nn.MaxPool2d(2),

#             nn.Conv2d(16, 32, 3, 1, 1),
#             nn.BatchNorm2d(32),
#             nn.ReLU(),
#             nn.MaxPool2d(2),

#             nn.Conv2d(32, 64, 3, 1, 1),
#             nn.BatchNorm2d(64),
#             nn.ReLU(),
#             nn.MaxPool2d(2),

#             nn.Conv2d(64, 64, 3, 1, 1),
#             nn.BatchNorm2d(64),
#             nn.ReLU(),
#             nn.MaxPool2d(2),

#             nn.Conv2d(64, 128, 3, 1, 1),
#             nn.BatchNorm2d(128),
#             nn.ReLU(),
#             nn.MaxPool2d(2),
            
#             nn.Linear(128, 10),
#         )
        
#     def forward(self, x):
#         x = self.conv(x)
#         return x


# class FeatureExtractor(nn.Module):

#     def __init__(self):
#         super(FeatureExtractor, self).__init__()

#         self.conv = nn.Sequential(
#             nn.Conv2d(1, 16, 3, 1, 1),
#             nn.BatchNorm2d(16),
#             nn.ReLU(),
#             nn.MaxPool2d(2),

#             nn.Conv2d(16, 32, 3, 1, 1),
#             nn.BatchNorm2d(32),
#             nn.ReLU(),
#             nn.MaxPool2d(2),

#             nn.Conv2d(32, 64, 3, 1, 1),
#             nn.BatchNorm2d(64),
#             nn.ReLU(),
#             nn.MaxPool2d(2),

#             nn.Conv2d(64, 64, 3, 1, 1),
#             nn.BatchNorm2d(64),
#             nn.ReLU(),
#             nn.MaxPool2d(2),

#             nn.Conv2d(64, 128, 3, 1, 1),
#             nn.BatchNorm2d(128),
#             nn.Tanh(),
#             nn.MaxPool2d(2)
#         )
        
#     def forward(self, x):
#         x = self.conv(x).squeeze()
#         return x

# class LabelPredictor(nn.Module):

#     def __init__(self):
#         super(LabelPredictor, self).__init__()

#         self.layer = nn.Sequential(
#             nn.Linear(128, 32),
#             nn.ReLU(),
#             nn.Linear(32, 10),
#         )

#     def forward(self, h):
#         c = self.layer(h)
#         return c

# class DomainClassifier(nn.Module):

#     def __init__(self):
#         super(DomainClassifier, self).__init__()

#         self.layer = nn.Sequential(
#             nn.Linear(128, 128),
#             nn.BatchNorm1d(128),
#             nn.ReLU(),

#             nn.Linear(128, 32),
#             nn.BatchNorm1d(32),
#             nn.ReLU(),

#             nn.Linear(32, 1),
#         )

#     def forward(self, h):
#         y = self.layer(h)
#         return y

In [None]:
feature_extractor = FeatureExtractor().cuda()
label_predictor = LabelPredictor().cuda()
domain_classifier = DomainClassifier().cuda()

class_criterion = nn.CrossEntropyLoss()
domain_criterion = nn.BCEWithLogitsLoss()

learning_rate = 1e-3
num_epochs = 300
L = 0.3  # L:lambda

optimizer_F = optim.Adam(feature_extractor.parameters(), lr=learning_rate)
optimizer_C = optim.Adam(label_predictor.parameters(), lr=learning_rate)
optimizer_D = optim.Adam(domain_classifier.parameters(), lr=learning_rate)

### semi-supervise

In [None]:
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
class MyDataset(Dataset):              # 自制数据集,继承Dataset,用来生成batch
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __getitem__(self, index):           # 返回的是tensor
        x, y = self.x[index], self.y[index]
        return x, y

    def __len__(self):
        return len(self.x)

def get_pseudo_labels(dataset, model1, model2, threshold=0.1, batch_size = 1024):
    # This functions generates pseudo-labels of a dataset using given model.
    # It returns an instance of DatasetFolder containing images whose prediction confidences exceed a given threshold.
    # You are NOT allowed to use any models trained on external data for pseudo-labeling.
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Construct a data loader.
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    # Make sure the model is in eval mode.
    model1.eval()
    model2.eval()
    # Define softmax function.
    softmax = nn.Softmax(dim=-1)

    # Iterate over the dataset by batches.
    data_x = torch.tensor([])
    data_y = torch.tensor([])
    for batch in data_loader:
        img, _ = batch
        # Forward the data
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model2(model1(img.to(device)))

        # Obtain the probability distributions by applying softmax on logits.
        probs = softmax(logits)     # (n_b, 11)
        x, y = torch.max(probs, dim=1)
        y = y.cpu()
        idx = x > threshold
        data_x = torch.cat([data_x, img[idx]])
        data_y = torch.cat([data_y, y[idx]])
        # ---------- TODO ----------
        # Filter the data and construct a new dataset.
    if len(data_x) == 0:
        return None
    new_dataset = MyDataset(data_x, data_y)
    # # Turn off the eval mode.
    model1.train()
    model2.train()
    return new_dataset

### 训练(可调lambda)
### D: domain classifier
### F: feature extractor
### C: label predictor

In [None]:
def train_epoch(source_dataloader, target_dataloader, lamb):
    '''
      Args:
        source_dataloader: source data的dataloader
        target_dataloader: target data的dataloader
        lamb: control the balance of domain adaptatoin and classification.
    '''

    # D loss: Domain Classifier的loss
    # F loss: Feature Extrator & Label Predictor的loss
    running_D_loss, running_F_loss = 0.0, 0.0
    total_hit, total_num = 0.0, 0.0
    # 加个Discriminator的准确率
    total_dis, total_dis_num = 0.0, 0.0
    

    for i, ((source_data, source_label), (target_data, _)) in enumerate(zip(source_dataloader, target_dataloader)):

        source_data = source_data.cuda()
        source_label = source_label.cuda()
        target_data = target_data.cuda()
        
        # Mixed the source data and target data, or it'll mislead the running params
        #   of batch_norm. (runnning mean/var of soucre and target data are different.)
        mixed_data = torch.cat([source_data, target_data], dim=0)
        domain_label = torch.zeros([source_data.shape[0] + target_data.shape[0], 1]).cuda()
        # set domain label of source data to be 1.
        domain_label[:source_data.shape[0]] = 1

        # Step 1 : train domain classifier
        feature = feature_extractor(mixed_data)
        # We don't need to train feature extractor in step 1.
        # Thus we detach the feature neuron to avoid backpropgation.
        # 1.训练Discriminator
        # 在feature这里detach斩断了梯度传导,这样只会改变domain_classifier的参数而不会改变前面generator的参数
        domain_logits = domain_classifier(feature.detach())
        loss = domain_criterion(domain_logits, domain_label)
        running_D_loss+= loss.item()
        loss.backward()
        optimizer_D.step()

        # Step 2 : train feature extractor and label classifier
        # 2.训练以特征为输入的分类器(C)+特征抽取的Generator(F)
        class_logits = label_predictor(feature[:source_data.shape[0]])
        domain_logits = domain_classifier(feature)
        # loss = cross entropy of classification - lamb * domain binary cross entropy.
        #  The reason why using subtraction is similar to generator loss in disciminator of GAN
        loss = class_criterion(class_logits, source_label) - lamb * domain_criterion(domain_logits, domain_label)
        
        running_F_loss+= loss.item()
        loss.backward()
        optimizer_F.step()
        optimizer_C.step()

        optimizer_D.zero_grad()
        optimizer_F.zero_grad()
        optimizer_C.zero_grad()

        total_hit += torch.sum(torch.argmax(class_logits, dim=1) == source_label).item()
        total_num += source_data.shape[0]
        total_dis += torch.sum((domain_logits > 0.5) == domain_label).item()
        total_dis_num += domain_label.shape[0]
#         print(i, end='\r')

    return running_D_loss / (i+1), running_F_loss / (i+1), total_hit / total_num, total_dis / total_dis_num

test_model = Test_model().to('cuda')
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(test_model.parameters(), lr=learning_rate, weight_decay=1e-5)
# train 200 epochs
for epoch in range(num_epochs):
    # 训练test_model
#     if epoch > num_epochs//2:  # semi-supervise
#     if epoch >= 0:  # semi-supervise
#         pseudo_set = get_pseudo_labels(target_dataset, model1=feature_extractor, model2=label_predictor)
#         if pseudo_set != None:
#             print('pseudo长度: ', len(pseudo_set))
#             pseudo_loader = DataLoader(pseudo_set, batch_size=1024, shuffle=True)
#             test_model.train()
#             for batch in pseudo_loader:           # unlabel标签训练
#                 imgs, labels = batch
#                 logits = test_model(imgs.to('cuda'))
#                 loss = loss_func(logits, labels.long().to(device))
#                 optimizer.zero_grad()
#                 loss.backward()
#                 grad_norm = nn.utils.clip_grad_norm_(test_model.parameters(), max_norm=10)
#                 optimizer.step()
        
    
    # You should chooose lamnda cleverly.
#     cur_lamb = L*((epoch/num_epochs)**(1/3))
    train_D_loss, train_F_loss, train_acc, dis_acc = train_epoch(source_dataloader, target_dataloader, lamb=L)

    torch.save(feature_extractor.state_dict(), f'extractor_model.bin')
    torch.save(label_predictor.state_dict(), f'predictor_model.bin')
    print('epoch {:>3d}: train D loss: {:6.4f}, train F loss: {:6.4f}, Classifier acc: {:6.4f}, dis_acc: {:6.4f}'.format(epoch, train_D_loss, train_F_loss, train_acc, dis_acc))


### Inference

In [None]:
result = []
label_predictor.eval()
feature_extractor.eval()
for i, (test_data, _) in enumerate(test_dataloader):
    test_data = test_data.cuda()

    class_logits = label_predictor(feature_extractor(test_data))
#     class_logits = test_model(test_data)  # semi-supervise

    x = torch.argmax(class_logits, dim=1).cpu().detach().numpy()
    result.append(x)

import pandas as pd
result = np.concatenate(result)

# Generate your submission
df = pd.DataFrame({'id': np.arange(0,len(result)), 'label': result})
df.to_csv('DaNN_submission.csv',index=False)

In [None]:
import time
time.sleep(40000)