In [None]:
import matplotlib.pyplot as plt
import torch 
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
import random
from torch.autograd import Variable
import torch.utils.data as Data
import pandas as pd
import torchvision

from torchvision.transforms import transforms
from torch.utils.data import DataLoader, Dataset


In [None]:
def seed_everything(SEED=42):
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.benchmark = True # keep True if all the input have same size.
SEED=42
seed_everything(SEED=SEED)

In [None]:
transform1 = transforms.Compose(
    [transforms.RandomResizedCrop(224),
     transforms.RandomHorizontalFlip(),
     transforms.ToTensor(),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])#for train

transform2 = transforms.Compose(
    [transforms.Resize((256, 256)),
     transforms.CenterCrop((224,224)),
     transforms.ToTensor(),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])# for eval/test

In [None]:
train_data = torchvision.datasets.ImageFolder('../input/caltech101/Caltech101/Caltech101/train',transform=transform1)
eval_data=torchvision.datasets.ImageFolder('../input/caltech101/Caltech101/Caltech101/eval',transform=transform2)
test_data=torchvision.datasets.ImageFolder('../input/caltech101/Caltech101/Caltech101/test',transform=transform2)

train_loader=Data.DataLoader(dataset=train_data,batch_size=16,
                             shuffle=True, num_workers=2)
eval_loader=Data.DataLoader(dataset=eval_data,batch_size=128,
                             shuffle=True, num_workers=2)
test_loader=Data.DataLoader(dataset=test_data,batch_size=128,
                             shuffle=True, num_workers=2)

In [None]:
import torch.nn as nn
import torch


class BasicBlock(nn.Module):#for resnet 18 (only has 2 3*3,64)
    expansion = 1 # whether the number of kernels changed

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):#downsample is for the dotted line in the image
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, padding=1, bias=False)
                                                                    #stride=1 means the solid line in the image output=input
                                                                    #while 2 menas the dotted line output=input/2
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=1, padding=1, bias=False)#bias=false because batch normalization
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)#mind no relu

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):# for resnet 50
    expansion = 4# because the latter one is four times of the previous

    def __init__(self, in_channel, out_channel, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
                               kernel_size=1, stride=1, bias=False)  # squeeze channels; #no changing size
        
        self.bn1 = nn.BatchNorm2d(out_channel)

        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
                               kernel_size=3, stride=stride, bias=False, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channel)
      
        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,
                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels
        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x #output of the shortcut
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)# mind no relu

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):#block=BasicBlock/bottleneck
        #include_top is for other improvement(more complex network) based on resnet
        super(ResNet, self).__init__()
        self.include_top = include_top
        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)#7*7,64
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)
            self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():#initialize conv layer
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:#this works only for resnet 50and more
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel))# add the central part(solid line part)

        return nn.Sequential(*layers)#key word

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.include_top:
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)

        return x


def resnet18(num_classes=1000, include_top=True):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes, include_top=include_top)


def resnet50(num_classes=1000, include_top=True):
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)

In [None]:
#rnn = resnet50(num_classes=101).cuda()
#rnn = resnet50()
rnn = resnet18()
#print(rnn)  # net architecture

In [None]:
model_weight_path = "../input/pretrained-pytorch-models/resnet18-5c106cde.pth"
#model_weight_path = "../input/pretrained-pytorch-models/resnet50-19c8e357.pth"
missing_keys, unexpected_keys = rnn.load_state_dict(torch.load(model_weight_path), strict=False)
# for param in net.parameters():
#     param.requires_grad = False
# change fc layer structure
inchannel = rnn.fc.in_features
rnn.fc = nn.Linear(inchannel, 101)
rnn.cuda()

In [None]:
EPOCH = 40

LR=0.001
stepsize=40
from torch.optim.lr_scheduler import ReduceLROnPlateau
optimizer=torch.optim.Adam(rnn.parameters(),lr=LR,weight_decay=1e-5)
loss_func=nn.CrossEntropyLoss()


In [None]:
EPOCH=40
best_acc = 0.0
train_num = len(train_data)
val_num = len(eval_data)
losses=[]
eval_accs=[]
train_accs=[]
for epoch in range(EPOCH):
    # train
    rnn.train()
    running_loss = 0.0
    train_acc=[]
    acc = 0.0
    for step, data in enumerate(train_loader, start=0):
        images, labels = data
        optimizer.zero_grad()
        logits = rnn(images.cuda())
        loss = loss_func(logits, labels.cuda())
        
        loss.backward()
        optimizer.step()
        
        predict_y = torch.max(logits, dim=1)[1]
        acc += (predict_y == labels.cuda()).sum().item()
        train_accurate = acc / train_num
        train_acc.append(train_accurate)

        # print statistics
        running_loss += loss.item()
        # print train process
        rate = (step+1)/len(train_loader)
        a = "*" * int(rate * 50)
        b = "." * int((1 - rate) * 50)
        print("\rtrain loss: {:^3.0f}%[{}->{}]{:.4f}".format(int(rate*100), a, b, loss), end="")
    print()
    train_accs.append(train_acc[-1])

    # validate
    torch.cuda.empty_cache()
    rnn.eval()
    acc = 0.0  # accumulate accurate number / epoch
    with torch.no_grad():
        for val_data in eval_loader:
            val_images, val_labels = val_data
            outputs = rnn(val_images.cuda())  # eval model only have last output layer
            # loss = loss_function(outputs, test_labels)
            predict_y = torch.max(outputs, dim=1)[1]
            acc += (predict_y == val_labels.cuda()).sum().item()
        val_accurate = acc / val_num
        eval_accs.append(val_accurate)
        if val_accurate > best_acc:
            best_acc = val_accurate
            
        print('[epoch %d] train_loss: %.3f  test_accuracy: %.3f train_accuracy: %.3f   ' %
              (epoch + 1, running_loss / step, val_accurate,train_accs[-1]))
        losses.append(running_loss / step)

print('Finished Training')


In [None]:
x=np.arange(len(losses))
plt.plot(x,losses)
#plt.plot(x,eval_losses)
plt.title('resnet 50 transfer Loss ')
plt.xlabel('Epoch')
plt.ylabel('Loss')
#plt.legend
plt.show()

In [None]:
x=np.arange(len(train_accs))

plt.plot(x,train_accs,label='train')
plt.plot(x,eval_accs,label='validate')
plt.title('resnet 50 transfer Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()