In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision as tv

import numpy as np
import matplotlib.pyplot as plt
import cv2


import tqdm 
import os
from torch.cuda.amp import autocast, GradScaler


In [4]:
%matplotlib
%matplotlib inline

Using matplotlib backend: <object object at 0x0000016131F6A440>


In [5]:
import zipfile as zf

files = zf.ZipFile("archive.zip",'r')
files.extractall()
files.close()

In [6]:
class Dataset2class(torch.utils.data.Dataset):
    def __init__(self, path_dir1:str, path_dir2:str):
        super().__init__()
        
        self.path_dir1 = path_dir1
        self.path_dir2 = path_dir2
        
        self.dir1_list = sorted(os.listdir(path_dir1))
        self.dir2_list = sorted(os.listdir(path_dir2))
    
    def __len__(self):
        return len(self.dir2_list) + len(self.dir1_list)
    
    def __getitem__(self, idx :int):
        
        if idx < len(self.dir1_list):
            class_id = 0
            img_path = os.path.join(self.path_dir1, self.dir1_list[idx])
        else:
            class_id = 1
            idx -= len(self.dir1_list)
            img_path = os.path.join(self.path_dir2, self.dir2_list[idx])
        
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32)
        img /= 255.0
        
        img = cv2.resize(img, (64, 64),  interpolation = cv2.INTER_AREA)
        img = img.transpose((2, 0, 1))
        
        t_img = torch.from_numpy(img)
        t_class_id = torch.tensor(class_id)
        
        return {'img': t_img, 'label': t_class_id}
        
        
os.listdir()

['.ipynb_checkpoints', 'archive.zip', 'dataset', 'resnet.ipynb']

In [7]:
train_ds_catsdogs = Dataset2class(
    './dataset/training_set/dogs',
    './dataset/training_set/cats'
)

test_ds_catsdogs = Dataset2class(
    './dataset/test_set/dogs',
    './dataset/test_set/cats'
)

In [8]:
len(train_ds_catsdogs)

8000

In [9]:
len(test_ds_catsdogs)

2000

In [10]:
batch_size = 16

train_loader = torch.utils.data.DataLoader(
    train_ds_catsdogs, shuffle=True,
    batch_size=batch_size, num_workers=0, drop_last=True
    
)
test_loader = torch.utils.data.DataLoader(
    test_ds_catsdogs, shuffle=True,
    batch_size=batch_size, num_workers=0
    
    
)

In [11]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.act = nn.LeakyReLU(0.2)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv0  = nn.Conv2d(3, 128, 3, stride=1, padding=0)
        self.conv1  = nn.Conv2d(128, 128, 3, stride=1, padding=0)
        self.conv2  = nn.Conv2d(128, 128, 3, stride=1, padding=0)
        self.conv3  = nn.Conv2d(128, 256, 3, stride=1, padding=0)
        
        self.adaptivepool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        
        self.linear = nn.Linear(256, 20)
        self.linear2 = nn.Linear(20, 2)
    
    def forward(self, x):

        out = self.conv0(x)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv1(out)
        out = self.act(out)
        out = self.maxpool(out)
        
        out = self.conv2(out)
        out = self.act(out)
        out = self.maxpool(out)
        
        out = self.conv3(out)
        out = self.act(out)

        out = self.adaptivepool(out)
        out = self.flatten(out)
        out = self.linear(out)
        out = self.act(out)
        out = self.linear2(out)
    


        return out
        

In [12]:
model = ConvNet()
model

ConvNet(
  (act): LeakyReLU(negative_slope=0.2)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv0): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (adaptivepool): AdaptiveAvgPool2d(output_size=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear): Linear(in_features=256, out_features=20, bias=True)
  (linear2): Linear(in_features=20, out_features=2, bias=True)
)

In [13]:
for sample in train_loader:
    
    img = sample['img']
    label = sample['label']
    model(img)
    break


In [14]:
loss_fn = nn.CrossEntropyLoss()
opimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas = (0.9, 0.999))


In [15]:
def accuracy(pred, label):
    answer = F.softmax(pred.detach()).numpy().argmax(1) == label.numpy().argmax(1)
    return answer.mean()

In [16]:
device = 'cuda'
model = model.to(device)
loss_fn = loss_fn.to(device)


In [17]:
use_amp = True 
scaler = torch.cuda.amp.GradScaler()

In [15]:
epochs = 16
for epoch in range(epochs):
    loss_val = 0
    acc_val = 0
    for sample in (pbar := tqdm.tqdm(train_loader)):
        with autocast(use_amp):
            img, label = sample['img'], sample['label']
            img = img.to(device)
            label = label.to(device)
            opimizer.zero_grad()

            label = F.one_hot(label, 2).float()
            
            with autocast(use_amp):
                pred = model(img)

                loss = loss_fn(pred, label)

        scaler.scale(loss).backward()
        loss_item = loss.item()
        loss_val += loss_item
        
        scaler.step(opimizer)
        scaler.update()
        acc_current = accuracy(pred.cpu().float(), label.cpu().float())
        acc_val += acc_current

    pbar.set_description(f'loss:{loss_item:.4e}\taccuracy: {acc_current:.3f}')
    print(loss_val/len(train_loader))
    print(acc_val/len(train_loader))


  answer = F.softmax(pred.detach()).numpy().argmax(1) == label.numpy().argmax(1)
100%|████████████████████████████████████████████████████████████████████████████████| 250/250 [00:34<00:00,  7.17it/s]


0.6919250411987304
0.52375


100%|████████████████████████████████████████████████████████████████████████████████| 250/250 [00:35<00:00,  7.12it/s]


0.661897964477539
0.6075


100%|████████████████████████████████████████████████████████████████████████████████| 250/250 [00:35<00:00,  7.01it/s]


0.6193456443548202
0.65525


  2%|█▋                                                                                | 5/250 [00:00<00:42,  5.82it/s]


KeyboardInterrupt: 

In [18]:
model_res = tv.models.resnet.resnet34()

In [19]:
model_res

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [88]:
class ResBlock(nn.Module):
    def __init__(self, nc):
        super().__init__()
        self.conv0 = nn.Conv2d(nc, nc, kernel_size = 3, padding = 1)
        self.norm0 = nn.BatchNorm2d(nc)
        self.act = nn.LeakyReLU(0.2 , inplace = True)
        self.conv1 = nn.Conv2d(nc, nc, kernel_size = 3, padding = 1)
        self.norm1 = nn.BatchNorm2d(nc)
        
    
    def forward(self, x):
        out = self.conv0(x)
        out = self.norm0(out)
        out = self.act(out)
        out = self.conv1(out)
        out = self.norm1(out)
        print((x + out).shape)
        return self.act(x + out)
    
        

In [89]:
class ResTruck(nn.Module):
    def __init__(self, nc, num_blocks):
        super().__init__()
        truck = []
        for i in range(num_blocks):
            truck += [(ResBlock(nc))]
        self.truck = nn.Sequential(*truck)
    def forward(self, x):
        print(self.truck(x).shape)
        return self.truck(x)

In [121]:
class PseResNet(nn.Module):
    def __init__(self, in_nc, nc, out_nc):
        super().__init__()
        self.conv0 = nn.Conv2d(in_nc, nc, kernel_size=7, stride=2)
        #self.norm
        self.act = nn.LeakyReLU(0.2, inplace = True)
        self.maxpool = nn.MaxPool2d(2, 2)
        
        self.layer1 = ResTruck(nc, 3)
        self.conv1 = nn.Conv2d(nc, 2*nc, 3, padding=1, stride=2)
        self.layer2 = ResTruck(2*nc, 4)
        self.conv2 = nn.Conv2d(2*nc, 4*nc, 3, padding=1, stride=2)
        self.layer3 = ResTruck(4*nc, 6)
        self.conv3 = nn.Conv2d(4*nc, 4*nc, 3, padding=1, stride=2)
        self.layer4 = ResTruck(4*nc, 3)
        self.flatten = nn.Flatten()
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.linear = nn.Linear(4*nc, out_nc)
        


        
    
    def forward(self, x):
        print(x.shape)
        out = self.conv0(x)
        print(10)
        out = self.act(out)
        out = self.maxpool(out)
        print(10)
        out = self.layer1(out)
        print(10)
        out = self.conv1(out)
        out = self.layer2(out)
        out = self.conv2(out)
        out = self.layer3(out)
        out = self.conv3(out)
        out = self.layer4(out)

        
        out = self.avgpool(out)
        out = self.flatten(out)
        out = self.linear(out)
        
        return out
        
    

In [122]:
model = PseResNet(3, 32, 2)

In [123]:
pytorch_total_params = sum(p.numel() for p in model.parameters())

In [124]:
pytorch_total_params

3258370

In [125]:

for i, sample in enumerate(train_loader):
    img = sample['img']
    label = sample['label']
    
    
    break

In [126]:
epochs = 10
for epoch in range(epochs):
    loss_val = 0
    acc_val = 0
    for sample in (pbar := tqdm.tqdm(train_loader)):
        img, label = sample['img'], sample['label']
        img = img.to(device)
        label = label.to(device)
        opimizer.zero_grad()
        label = F.one_hot(label, 2).float()
            
        with autocast(use_amp):
            print(model(img))
            pred = model(img)

            loss = loss_fn(pred, label)

        scaler.scale(loss).backward()
        loss_item = loss.item()
        loss_val += loss_item
        
        scaler.step(opimizer)
        scaler.update()
        acc_current = accuracy(pred.cpu().float(), label.cpu().float())
        acc_val += acc_current

    pbar.set_description(f'loss:{loss_item:.4e}\taccuracy: {acc_current:.3f}')
    print(loss_val/len(train_loader))
    print(acc_val/len(train_loader))


  0%|          | 0/500 [00:00<?, ?it/s]


torch.Size([16, 3, 64, 64])


RuntimeError: Input type (struct c10::Half) and bias type (float) should be the same