# Resnet

In [1]:
import os
import kagglehub

# Download latest version
dataset_path = kagglehub.dataset_download("marquis03/cats-and-dogs")

print("Path to dataset files:", dataset_path)


Path to dataset files: /Users/phamdinhtrunghieu/.cache/kagglehub/datasets/marquis03/cats-and-dogs/versions/2


In [2]:
import torch
from torch import nn
import torch.nn.functional as f
from PIL import Image
from torchvision import models, transforms, datasets
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

INPUTSIZE = 224
PATCHSIZE = 32


## Load Dataset

In [3]:
transform = transforms.Compose([
    transforms.Resize((INPUTSIZE, INPUTSIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset_folder = datasets.ImageFolder(root = dataset_path + "/train", transform = transform)
dataset = DataLoader(dataset_folder, batch_size = PATCHSIZE, shuffle = True)

number_of_labels = len(dataset_folder.class_to_idx)

def show_image(img):
  plt.imshow(img)
  plt.show()
  


In [4]:
print(dataset_folder.class_to_idx)
for images, labels in dataset:
    for image in images:
        print("=====")
        print(image.numpy())
        # print(show_image(Image.fromarray(image.permute(1, 2, 0).numpy())))
        break
    break

{'cat': 0, 'dog': 1}
=====
[[[-0.18280679 -0.18280679 -0.19993155 ...  1.8721637   1.8721637
    1.8550389 ]
  [-0.18280679 -0.18280679 -0.19993155 ...  1.8721637   1.8721637
    1.8550389 ]
  [-0.18280679 -0.18280679 -0.19993155 ...  1.8721637   1.8721637
    1.8550389 ]
  ...
  [ 0.70768046  0.60493195  0.6563062  ...  0.7590547   0.7590547
    0.69055575]
  [ 0.6563062   0.57068247  0.46793392 ...  0.94742703  0.810429
    0.6563062 ]
  [ 0.31381115  0.15968838  0.38231018 ...  1.2042983   1.1015497
    0.94742703]]

 [[-0.30252096 -0.30252096 -0.32002798 ...  2.0434172   2.0434172
    2.0259104 ]
  [-0.30252096 -0.30252096 -0.32002798 ...  2.0434172   2.0434172
    2.0259104 ]
  [-0.30252096 -0.30252096 -0.32002798 ...  2.0434172   2.0434172
    2.0259104 ]
  ...
  [ 0.94047624  0.83543426  0.88795525 ...  0.88795525  0.87044823
    0.8004202 ]
  [ 0.88795525  0.8004202   0.69537824 ...  1.0280112   0.88795525
    0.7303922 ]
  [ 0.5378152   0.3802522   0.6078432  ...  1.2906163   

In [70]:
class ResnetLayer(nn.Module):
    def __init__(self, input_channels, output_channels, stride = 1, ConvertingBlock = None):
        super().__init__()
        self.ConvertingBlock = ConvertingBlock
        self.output_channels = output_channels
        
        self.conv1 = nn.Conv2d(input_channels, output_channels, kernel_size = 3, stride = stride, padding = 1)
        self.conv2 = nn.Conv2d(output_channels, output_channels, kernel_size = 3, stride = 1, padding = 1)
        self.conv3 = nn.Conv2d(output_channels, output_channels, kernel_size = 3, stride = 1, padding = 1)
        self.conv4 = nn.Conv2d(output_channels, output_channels, kernel_size = 3, stride = 1, padding = 1)

    def forward(self, x):
        output = self.conv1(x)
        output = self.conv2(output)
        output = self.conv3(output)
        output = self.conv4(output)
        
        if self.ConvertingBlock:
            x = self.ConvertingBlock(x)
        output += x
        output = f.relu(output)
        return output
    
class ResNet(nn.Module):
    def __init__(self):
        super().__init__()
        tensor_size = INPUTSIZE
        self.conv1 = nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3)
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        tensor_size //= 2
        
        self.block1 = self._make_block(64, 64)
        tensor_size //= 2
        self.block2 = self._make_block(64, 128, stride = 2)
        tensor_size //= 2
        self.block3 = self._make_block(128, 256, stride = 2)
        tensor_size //= 2
        self.block4 = self._make_block(256, 512, stride = 2)
        tensor_size //= 2
        
        self.avgpool = nn.AvgPool2d((1, 1))
        self.fc = nn.Linear(512 * tensor_size * tensor_size, number_of_labels)    
    
    def forward(self, x):
        x = f.relu(self.conv1(x))
        x = self.maxpool(x)
        
        x = f.relu(self.block1(x))
        # print(x.size())
        x = f.relu(self.block2(x))
        # print(x.size())
        x = f.relu(self.block3(x))
        # print(x.size())
        x = f.relu(self.block4(x))
        # print(x.size())
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        
        x = self.fc(x).float()
        # print(x)
        
        return x
        
    def _make_block(self, input_channels, output_channels, stride = 1):
        ConvertingBlock = None
        
        if stride != 1 or input_channels != output_channels:
            ConvertingBlock = nn.Conv2d(input_channels, output_channels, kernel_size = 1, stride = stride)
        
        block = ResnetLayer(input_channels, output_channels, stride, ConvertingBlock)
        
        return block

    def cal_tensor_size(tensor_size, kernel_size = 3, stride = 1, padding = 1):
        return (tensor_size - kernel_size + 2 * padding) // stride + 1
        

        
    

In [71]:
def get_label(y):
  res = 0
  for id in range(number_of_labels):
    if y[res].item() < y[id].item():
      res = id

  return labels[res]

In [72]:
model = ResNet()

import torch.optim as optim

optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)

loss_calculator = nn.CrossEntropyLoss()

for epoch in range(100):
    for x, y in dataset:
        y_hat = model(x)
        
        loss = loss_calculator(y_hat, y)
        print(y.numpy())
        print(torch.argmax(y_hat, axis = 1).numpy())
        print(loss.item())
        
        optimizer.zero_grad()
        
        loss.backward()
        
        optimizer.step()

[1 0 0 0 1 1 1 1 1 0 0 1 1 0 0 1 0 1 0 1 1 0 0 0 0 0 1 1 0 0 0 0]
[1 0 1 1 0 1 1 0 1 1 0 1 0 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1]
0.6942543387413025
[0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 1 1 1 0 0 1 1 1 1 1 0 1 0 1 1 1]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
0.692782461643219
[1 1 0 0 1 1 0 1 1 0 1 1 1 1 0 0 0 0 1 1 0 1 1 0 0 0 0 1 1 1 1 1]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
0.696505069732666
[1 0 0 0 0 0 1 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 0 1 0 0 1 1 1 0 1 0]
[1 1 1 1 1 0 1 1 1 0 1 0 0 1 1 1 1 1 1 1 0 1 1 0 0 1 0 1 0 1 0 0]
0.6929666996002197
[0 1 0 1 0 0 1 1 1 1 1 1 0 0 0 0 0 1 1 1 0 1 1 1 0 0 1 0 1 0 0 1]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
0.6845431327819824
[0 0 1 0 0 1 1 1 1 0 1 1 0 0 1 0 0 0 1 1 1 0 0 0 1 1 1 0 0 0 0 1]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
0.7031956911087036
[0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 1 0 0 0 0 1 1 0 1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

KeyboardInterrupt: 