# 1. Import Libraries

In [1]:
!pip install timm

Collecting timm
  Downloading timm-0.9.16-py3-none-any.whl (2.2 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.5/2.2 MB[0m [31m16.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: timm
Successfully installed timm-0.9.16


In [2]:
from google.colab import drive
import pandas as pd
from torch.utils.data import Dataset,DataLoader
import cv2
from torchvision import transforms
import random
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import warnings
from tqdm.auto import tqdm
import timm
warnings.filterwarnings('ignore')

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


# 2. Prepocessing Images

In [4]:
train_df = pd.read_csv('/content/drive/MyDrive/DeepLearning/datasets/cats_dogs/train_df.csv')
test_df = pd.read_csv('/content/drive/MyDrive/DeepLearning/datasets/cats_dogs/test_df.csv')

In [5]:
class shortsize_crop:
  def __init__(self,short=[256,480]):
    self.s = random.choice(short)

  def __call__(self,image):
    # image size : 3*H*W
    h = image.size(1)
    w = image.size(2)

    if h<self.s and w<self.s:
      image = transforms.Resize((self.s,self.s))(image)
    elif h<w:
      image = transforms.Resize((self.s,w))(image)
    else:
      image = transforms.Resize((h,self.s))(image)

    return image

In [6]:
train_transform = transforms.Compose([
    transforms.ToTensor(),
    shortsize_crop([256,480]),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    transforms.RandomResizedCrop((224,224)),
    transforms.RandomHorizontalFlip(0.5)
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

# 3. Create Dataset

In [7]:
class ResNetDataset(Dataset):
  def __init__(self,df,transform,train=True):
    self.df = df
    self.transform = transform
    self.train = train

  def __len__(self):
    return len(self.df)

  def __getitem__(self,index):
    path = self.df.loc[index,'path']
    image = cv2.imread(path)
    image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    image = self.transform(image)
    label = self.df.loc[index,'label']
    label = torch.tensor(label)

    if not self.train:
      images = []
      for scale in [224,256,384,480,640]:
        temp = shortsize_crop([scale])(image)
        images.append(temp)

      return images,label

    return image,label

# 4. Create Model

In [8]:
class conv_block(nn.Module):
  def __init__(self,dim,increase=False):
    super(conv_block,self).__init__()
    self.increase = increase
    if not increase:
      self.layer = nn.Sequential(
          nn.Conv2d(dim,dim,kernel_size=3,padding=1),
          nn.BatchNorm2d(dim),
          nn.ReLU(inplace=True),
          nn.Conv2d(dim,dim,kernel_size=3,padding=1),
          nn.BatchNorm2d(dim),
          nn.ReLU(inplace=True)
      )
      self.shortcut = nn.Identity()
    else:
      self.layer = nn.Sequential(
          nn.Conv2d(dim//2,dim,kernel_size=3,stride=2,padding=1),
          nn.BatchNorm2d(dim),
          nn.ReLU(inplace=True),
          nn.Conv2d(dim,dim,kernel_size=3,padding=1),
          nn.BatchNorm2d(dim),
          nn.ReLU(inplace=True)
      )
      self.shortcut = nn.Sequential(
          nn.Conv2d(dim//2,dim,kernel_size=1,stride=2,bias=False),
          nn.BatchNorm2d(dim)
      )

  def forward(self,x):
    out = self.layer(x) + self.shortcut(x)
    return out

class ResNet34(nn.Module):
  def __init__(self,num_classes):
    super(ResNet34,self).__init__()
    self.num_classes = num_classes
    self.convlayer0 = nn.Sequential(
        nn.Conv2d(3,64,kernel_size=7,stride=2,padding=3,bias=False), # B*64*112*112
        nn.BatchNorm2d(64),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3,stride=2,padding=1) # B*64*56*56
    )
    self.convlayer1 = nn.Sequential(
        conv_block(64),
        conv_block(64),
        conv_block(64) # B*64*56*56
    )
    self.convlayer2 = nn.Sequential(
        conv_block(128,increase=True),
        conv_block(128),
        conv_block(128),
        conv_block(128) # B*128*28*28
    )
    self.convlayer3 = nn.Sequential(
        conv_block(256,increase=True),
        conv_block(256),
        conv_block(256),
        conv_block(256),
        conv_block(256),
        conv_block(256) # B*256*14*14
    )
    self.convlayer4 = nn.Sequential(
        conv_block(512,increase=True),
        conv_block(512),
        conv_block(512) # B*512*7*7
    )
    self.avgpool = nn.AdaptiveAvgPool2d((1,1)) # B*512*1*1
    self.fc = nn.Linear(512,num_classes)
    self.test = nn.Sequential(
        nn.Conv2d(512,num_classes,kernel_size=7,bias=False),
        nn.BatchNorm2d(num_classes),
        nn.AdaptiveAvgPool2d((1,1))
    )

  def forward(self,x,train=True):
    if train:
      x = self.convlayer0(x)
      x = self.convlayer1(x)
      x = self.convlayer2(x)
      x = self.convlayer3(x)
      x = self.convlayer4(x)
      out = self.avgpool(x)
      out = out.view(-1,512)
      out = self.fc(out)
      return out

    else:
      for i,a in enumerate(x):
        a = self.convlayer0(a)
        a = self.convlayer1(a)
        a = self.convlayer2(a)
        a = self.convlayer3(a)
        a = self.convlayer4(a)
        a = self.test(a)
        a = a.view(-1,self.num_classes)
        if i==0:
          out = a
        else:
          out += a
      return out/5

In [25]:
class BottleNeck(nn.Module):
  def __init__(self,in_dim,out_dim):
    super(BottleNeck,self).__init__()

    stride = 1
    self.downsample = nn.Identity()

    if in_dim==out_dim:
      hidden_dim = in_dim//4
    if in_dim*2==out_dim:
      hidden_dim = in_dim//2
      stride = 2
      self.downsample = nn.Sequential(
          nn.Conv2d(in_dim,out_dim,kernel_size=1,stride=stride,bias=False),
          nn.BatchNorm2d(out_dim)
      )
    if in_dim*4==out_dim:
      hidden_dim = in_dim
      self.downsample = nn.Sequential(
          nn.Conv2d(in_dim,out_dim,kernel_size=1,stride=stride,bias=False),
          nn.BatchNorm2d(out_dim)
      )

    self.bottleneck = nn.Sequential(
        nn.Conv2d(in_dim,hidden_dim,kernel_size=1,bias=False),
        nn.BatchNorm2d(hidden_dim),
        nn.ReLU(inplace=True),
        nn.Conv2d(hidden_dim,hidden_dim,kernel_size=3,stride=stride,padding=1,bias=False),
        nn.BatchNorm2d(hidden_dim),
        nn.ReLU(inplace=True),
        nn.Conv2d(hidden_dim,out_dim,kernel_size=1,bias=False),
        nn.BatchNorm2d(out_dim)
    )

  def forward(self,x):
    return self.bottleneck(x) + self.downsample(x)

class ResNet(nn.Module):
  def __init__(self,num_classes,num_layers=50):
    super(ResNet,self).__init__()
    self.num_classes = num_classes

    if num_layers==50:
      _layers = [3,4,6,3]
    if num_layers==101:
      _layers = [3,4,23,3]
    if num_layers==152:
      _layers = [3,8,36,3]

    self.convlayer0 = nn.Sequential(
        nn.Conv2d(3,64,kernel_size=7,stride=2,padding=3),
        nn.BatchNorm2d(64),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
    )

    self.convlayer1 = self.make_layer(64,256,_layers[0])
    self.convlayer2 = self.make_layer(256,512,_layers[1])
    self.convlayer3 = self.make_layer(512,1024,_layers[2])
    self.convlayer4 = self.make_layer(1024,2048,_layers[3])

    self.avgpool = nn.AdaptiveAvgPool2d((1,1))
    self.fc = nn.Linear(2048,num_classes)

    self.test = nn.Sequential(
        nn.Conv2d(2048,num_classes,kernel_size=2,bias=False),
        nn.BatchNorm2d(num_classes)
    )

  def forward(self,x,train=True):
    if train:
      x = self.convlayer0(x)
      x = self.convlayer1(x)
      x = self.convlayer2(x)
      x = self.convlayer3(x)
      x = self.convlayer4(x)
      x = self.avgpool(x)
      x = x.view(-1,2048)
      out = self.fc(x)

      return out

    else:
      for i,a in enumerate(x):
        a = self.convlayer0(a)
        a = self.convlayer1(a)
        a = self.convlayer2(a)
        a = self.convlayer3(a)
        a = self.convlayer4(a)
        a = self.test(a)
        a = self.avgpool(a)
        if i==0:
          out = a.view(-1,self.num_classes)
        else:
          out += a.view(-1,self.num_classes)

      return out

  def make_layer(self,in_dim,out_dim,n):
    layers = []
    layers.append(BottleNeck(in_dim,out_dim))
    for _ in range(n-1):
      layers.append(BottleNeck(out_dim,out_dim))

    return nn.Sequential(*layers)

In [10]:
'''
convlayer0
conv(3,64,7,2,3) B*64*112*112
maxpool(3,2,1) B*64*56*56

convlayer1
bottleneck(64,256) B*256*56*56
bottleneck(256,256) B*256*56*56

convlayer2
bottleneck(256,512,downsample=True) B*512*28*28
bottleneck(512,512) X 3 B*512*28*28

convlayer3
bottleneck(512,1024,downsample=True) B*1024*14*14
bottleneck(1024,1024) X 5 B*1024*14*14

convlayer4
bottleneck(1024,2048,downsample=True) B*2048*7*7
bottleneck(2048,2048) X 2 B*2048*7*7
'''

'\nconvlayer0\nconv(3,64,7,2,3) B*64*112*112\nmaxpool(3,2,1) B*64*56*56\n\nconvlayer1\nbottleneck(64,256) B*256*56*56\nbottleneck(256,256) B*256*56*56\n\nconvlayer2\nbottleneck(256,512,downsample=True) B*512*28*28\nbottleneck(512,512) X 3 B*512*28*28\n\nconvlayer3\nbottleneck(512,1024,downsample=True) B*1024*14*14\nbottleneck(1024,1024) X 5 B*1024*14*14\n\nconvlayer4\nbottleneck(1024,2048,downsample=True) B*2048*7*7\nbottleneck(2048,2048) X 2 B*2048*7*7\n'

# 5. train

In [15]:
epoch = 100
batch_size = 16
lr = 1e-2

In [16]:
train_dataset = ResNetDataset(train_df,train_transform,train=True)
test_dataset = ResNetDataset(test_df,test_transform,train=False)

train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=2,drop_last=True)
test_loader = DataLoader(test_dataset,batch_size=1,shuffle=False,num_workers=2,drop_last=True)

In [17]:
def ToDevice(device,x):
  array = []
  for a in x:
    a = a[np.newaxis,:,:,:]
    a = a.to(device)
    array.append(a)

  return a

In [26]:
#model = ResNet34(num_classes=2)
model = ResNet(num_classes=2,num_layers=50)
#model = timm.create_model('resnet34')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
loss_fn = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode='max',factor=0.1,patience=10,verbose=True)

for i in tqdm(range(epoch)):
  model.train()
  train_loss_list = []
  for image,label in train_loader:
    image = image.to(device)
    label = label.to(device)

    optimizer.zero_grad()
    output = model(image,train=True)
    loss = loss_fn(output,label)
    loss.backward()
    optimizer.step()

    train_loss_list.append(loss.item())

  model.eval()
  test_loss_list = []
  test_acc_list = []
  for images,label in test_loader:
    images = ToDevice(device,images)
    label = label.to(device)

    output = model(images,train=False)
    loss = loss_fn(output,label)
    test_loss_list.append(loss.item())
    temp = torch.argmax(output,dim=1)
    acc = sum(temp==label).cpu()/len(label)
    test_acc_list.append(acc)

  scheduler.step(np.mean(test_acc_list))

  print('{}th epoch train loss : {:.4f} / test loss : {:.4f} / test acc : {:.4f}'.format(i,np.mean(train_loss_list),np.mean(test_loss_list),np.mean(test_acc_list)))

  0%|          | 0/100 [00:00<?, ?it/s]

0th epoch train loss : 1.8910 / test loss : 11.4084 / test acc : 0.5000
1th epoch train loss : 1.9197 / test loss : 0.7711 / test acc : 0.5000
2th epoch train loss : 1.0626 / test loss : 1.0484 / test acc : 0.4929
3th epoch train loss : 0.8115 / test loss : 0.6970 / test acc : 0.4786
4th epoch train loss : 0.7571 / test loss : 0.7295 / test acc : 0.4714
5th epoch train loss : 0.7198 / test loss : 0.7933 / test acc : 0.4143
6th epoch train loss : 0.6881 / test loss : 0.8141 / test acc : 0.4286
7th epoch train loss : 0.7054 / test loss : 0.7291 / test acc : 0.4143
8th epoch train loss : 0.6852 / test loss : 0.6980 / test acc : 0.4143
9th epoch train loss : 0.6706 / test loss : 0.7122 / test acc : 0.3929
10th epoch train loss : 0.6970 / test loss : 0.7104 / test acc : 0.3714
Epoch 00012: reducing learning rate of group 0 to 1.0000e-03.
11th epoch train loss : 0.6601 / test loss : 0.7724 / test acc : 0.4071
12th epoch train loss : 0.6584 / test loss : 0.7044 / test acc : 0.3929
13th epoch 

KeyboardInterrupt: 