In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt

from tensorboardX import SummaryWriter  

In [None]:
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True

setup_seed(20)

In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')

# Hyper-parameters for updating learning rate
num_epoches = 40
learning_rate = 0.1
batch_size = 128

# lr updating parameters
epoches = [1, 3, 4, 20, 21, 30, 31, 37, 38, num_epoches]
lrs = [learning_rate, 0.2, 0.1, 0.1, 0.01, 0.01, 0.001, 0.001, 0.0001, 0.0001]

In [None]:
class Cutout(object):
    """Randomly mask out one or more patches（补丁，就是一个遮挡小块） from an image.
    Args:
        n_holes (int): Number of patches to cut out of each image.
        length (int): The length (in pixels) of each square patch.
    """
    def __init__(self, n_holes, length):
        self.n_holes = n_holes
        self.length = length

    def __call__(self, img):
        """
        Args:
            img (Tensor): Tensor image of size (C, H, W).
        Returns:
            Tensor: Image with n_holes of dimension length x length cut out of it.
        """
        h = img.size(1)
        w = img.size(2)

        mask = np.ones((h, w), np.float32)

        for n in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)

            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)

            mask[y1: y2, x1: x2] = 0.

        mask = torch.from_numpy(mask)
        mask = mask.expand_as(img)
        img = img * mask

        return img

def mixup_data(x, y, alpha=1.0, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha) # beta分布
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

def CrossEntropyLoss_label_smooth(outputs, targets, device,
                                  num_classes=10, epsilon=0.075):
    N = targets.size(0)
    smoothed_labels = torch.full(size=(N, num_classes),
                                 fill_value=epsilon / (num_classes - 1))
    smoothed_labels.scatter_(dim=1, index=torch.unsqueeze(targets.to('cpu'), dim=1),
                             value=1-epsilon)
    smoothed_labels = smoothed_labels.to(device)
    log_prob = nn.functional.log_softmax(outputs, dim=1)
    loss = - torch.sum(log_prob * smoothed_labels) / N
    return loss

In [None]:
# CIFAR-10 dataset
cifar_norm_mean = (0.49139968, 0.48215827, 0.44653124)
cifar_norm_std = (0.24703233, 0.24348505, 0.26158768)

train_dataset = torchvision.datasets.CIFAR10(root='./data',
                                             train=True, 
                                             download=False,
                                             transform=transforms.Compose([
                                                 transforms.RandomCrop(32,padding = 4), 
                                                 transforms.RandomHorizontalFlip(),  
                                                 transforms.ToTensor(),  
                                                 transforms.Normalize(cifar_norm_mean, cifar_norm_std),
                                                 Cutout(n_holes=1, length=16)])
                                            )

test_dataset = torchvision.datasets.CIFAR10(root='./data',
                                            train=False, 
                                            transform=transforms.Compose([
                                                transforms.ToTensor(),
                                                transforms.Normalize(cifar_norm_mean, cifar_norm_std)])                                   
                                            )

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,   
                                           shuffle=True,
                                           #num_workers=2,
                                           pin_memory=(torch.cuda.is_available()),
                                           )

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False,
                                          #num_workers=2,
                                          pin_memory=(torch.cuda.is_available()),
                                          )

# Cifar-10的标签
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
                                                 
print(train_dataset,'\n')
print(train_dataset[0][0].shape,'\n')           
print(train_dataset[0][1],'\n')           
print(train_dataset.targets[0],'\n')      

batch=next(iter(train_loader))
images,labels = batch
print(images.shape)

In [None]:
# 3x3 convolution
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                     stride=stride, padding=1, bias=False)

In [None]:
# Residual block
class ResidualBlock(nn.Module): # stride=1时，(batch,in_c,w,h) -> (batch,out_ch_c,w,h)
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)  # (batch,in_c,w,h) -> (batch,out_c,w/stride,h/stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.celu = nn.CELU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)         # (batch,out_c,w,h) -> (batch,out_c,w,h)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.celu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:  # downsample的作用：仍然是一个3x3 conv+ bn
            residual = self.downsample(x) 
        out += residual      
        out = self.celu(out)
        return out
    
# ResNet
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):     
        super(ResNet, self).__init__()
        # prep
        self.conv1 = conv3x3(3, 16)         #(batch,3,32,32) -> (batch,16,32,32)
        self.bn1 = nn.BatchNorm2d(16)
        self.celu1 = nn.CELU(inplace=True)
        
        #layer1_ens
        self.conv2 = conv3x3(16, 16)         #(batch,16,32,32) -> (batch,16,32,32)
        self.bn2 = nn.BatchNorm2d(16)
        self.max_pool1 = nn.MaxPool2d(2,2)   # (batch,16,32,32) -> (batch,16,16,16)
        self.celu2 = nn.CELU(inplace=True)
        self.layer1 = self.make_layer(block, 16, 32, layers[0], 2) #layers[0]为2，则最后有1+1个block,(b,16,16,16)->(这里有一个downsample)(b,32,8,8)->(b,32,8,8)
        
        # layer2_ens
        self.conv3 = conv3x3(32, 64)         #(batch,32,8,8) -> (batch,64,8,8)
        self.bn3 = nn.BatchNorm2d(64)
        self.max_pool2 = nn.MaxPool2d(2,2)   # (batch,64,8,8) -> (batch,64,4,4)
        self.celu3 = nn.CELU(inplace=True)   
        
        # layer3_ens
        self.conv4 = conv3x3(64, 64)         #(batch,64,4,4) -> (batch,64,4,4)
        self.bn4 = nn.BatchNorm2d(64)
        self.max_pool3 = nn.MaxPool2d(2,2)   # (batch,64,2,2) -> (batch,64,2,2)
        self.celu4 = nn.CELU(inplace=True)
        self.layer2 = self.make_layer(block, 64, 128, layers[1], 2) #layers[0]为2，则最后有1+1个block,(b,64,2,2)->(这里有一个downsample)(b,128,1,1)->(b,128,1,1)
        
        #self.avg_pool = nn.AvgPool2d(4)          
        self.fc = nn.Linear(128, num_classes)     
        
    def make_layer(self, block, in_channels, out_channels, blocks, stride=1): 
        downsample = None
        if (stride != 1) or (in_channels != out_channels):
            downsample = nn.Sequential(
                conv3x3(in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
        layers = []
        layers.append(block(in_channels, out_channels, stride, downsample))
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        # prep
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.celu1(out)
        
        #layer1_ens
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.max_pool1(out)
        out = self.celu2(out)
        out = self.layer1(out)
        
        # layer2_ens
        out = self.conv3(out)
        out = self.bn3(out)
        out = self.max_pool2(out)
        out = self.celu3(out)
        
        # layer3_ens
        out = self.conv4(out)
        out = self.bn4(out)
        out = self.max_pool3(out)
        out = self.celu4(out)
        out = self.layer2(out)

        #out = self.avg_pool(out)
        out = out.view(out.size(0), -1)      # (b,64,1,1) ->(b,64)
        out = self.fc(out)
        return out

In [None]:
model = ResNet(ResidualBlock, [2, 2]).to(device)

dummy_input = torch.rand(20, 3, 32, 32).to(device) 
with SummaryWriter(comment='Resnet9') as w:
    w.add_graph(model, (dummy_input,))
    
print('# model parameters:', sum(param.numel() for param in model.parameters()))

In [None]:
# prediction function
def pred_rate(preds,labels):
    return preds.eq(labels).sum().item()/labels.shape[0]

# For updating learning rate
def piecewise_linear(optimizer, curr_epoch, epoches, lrs): 
    length = len(lrs)
    for i in range (length-1):
        if curr_epoch > epoches[i] and curr_epoch < epoches[i+1]:
            lr = lrs[i] + (curr_epoch-epoches[i])/(epoches[i+1]-epoches[i])*(lrs[i+1]-lrs[i])
            break
        elif curr_epoch == epoches[i]:
            lr = lrs[i]
            break
        elif curr_epoch == epoches[-1]:       
            lr = lrs[-1]
            break

    # print
    #if lrs[i] == lrs[i+1]:
        #print('Epoch [{}/{}], learning rate kept still in {}'.format(epoch+1, num_epoches, lr))
    #else:
        #print('Epoch [{}/{}], learning rate updated to {}'.format(epoch+1, num_epoches, lr))
    
    # update lr
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
        
    return lr

In [None]:
# Loss and optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum = 0.9, weight_decay = 5e-4, nesterov = True)

In [None]:
# Reload the model
model = ResNet(ResidualBlock, [2, 2]).to(device)
model.load_state_dict(torch.load('resnet9_piecewise_linear.ckpt'))

In [None]:
# 画图
loss_ens = []
lr_ens = [learning_rate]
pred_rate_ens = []

# Train the model
total_step = len(train_loader)
for epoch in range(num_epoches):
    for i, (images, labels) in enumerate(train_loader):
        images,labels = images.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = CrossEntropyLoss_label_smooth(outputs, labels, device)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 98 == 0:
            preds=outputs.argmax(dim=1)
            print ("Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Prediction rate: {:.4f}"
                   .format(epoch+1, num_epoches, i+1, total_step, loss.item(), pred_rate(preds,labels)))
            
            # 保存数据以画图
            pred_rate_ens.append(pred_rate(preds,labels))
            loss_ens.append(loss.item())
            
    # update learning rate
    curr_lr = piecewise_linear(optimizer, epoch+1, epoches, lrs)
    # 保存数据以画图
    lr_ens.append(curr_lr)

In [None]:
# draw the loss, pred_rate with iteration andd the lr with epoch
plt. figure(figsize=(30,10))

plt.subplot(131)
plt.plot(np.arange(1,len(pred_rate_ens)+1),pred_rate_ens)
plt.title('prediction rate')
plt.xlabel('iteration')

plt.subplot(132)
plt.plot(np.arange(1,len(loss_ens)+1),loss_ens)
plt.title('loss')
plt.xlabel('iteration')

plt.subplot(133)
plt.plot(np.arange(num_epoches+1),lr_ens)
plt.title('learning rate')
plt.xlabel('epoch')

plt.show()

In [None]:
# Save the model checkpoint
torch.save(model.state_dict(), 'resnet9_piecewise_linear.ckpt')

In [None]:
# Test the model
model = ResNet(ResidualBlock, [2, 2]).to(device)
model.load_state_dict(torch.load('resnet9_piecewise_linear.ckpt'))

model.eval()
with torch.no_grad():        
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))