In [70]:
import numpy as np
import os,random
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm import tqdm_notebook as tqdm


import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.nn.functional as F
import torchvision
from torchvision import transforms

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [60]:
# Train_dir, Test_dir
base_dir = '/content/drive/MyDrive/Colab Notebooks/cat_dog/train'
train_dir = '/content/drive/MyDrive/Colab Notebooks/cat_dog/train'
test_dir = '/content/drive/MyDrive/Colab Notebooks/cat_dog/test'

In [5]:
# Initialize an empty list to hold the image file paths
train_list = []

# Walk through the train directory
for root, dirs, files in os.walk(train_dir):
    for file in files:
        # Check if the file is an image by its extension
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            # Construct the full file path and append it to train_list
            full_path = os.path.join(root, file)
            train_list.append(full_path)

# Print the number of images found and the list
print(f'Total images found: {len(train_list)}')


Total images found: 557


In [6]:
# Initialize an empty list to hold the image file paths for testing
test_list = []

# Walk through the test directory
for root, dirs, files in os.walk(test_dir):
    for file in files:
        # Check if the file is an image by its extension
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            # Construct the full file path and append it to test_list
            full_path = os.path.join(root, file)
            test_list.append(full_path)

# Print the number of images found and the list
print(f'Total images found in test directory: {len(test_list)}')


Total images found in test directory: 140


In [7]:
# Image_Id is contained in filepath
test_list[:5]

['/content/drive/MyDrive/Colab Notebooks/cat_dog/test/cats/cat_140.jpg',
 '/content/drive/MyDrive/Colab Notebooks/cat_dog/test/cats/cat_190.jpg',
 '/content/drive/MyDrive/Colab Notebooks/cat_dog/test/cats/cat_342.jpg',
 '/content/drive/MyDrive/Colab Notebooks/cat_dog/test/cats/cat_113.jpg',
 '/content/drive/MyDrive/Colab Notebooks/cat_dog/test/cats/cat_268.jpg']

In [8]:
# Get Label
train_list[0].split('/')[-2].split('.')[0]

'cats'

In [9]:
# Divide Train, Valid Data
train_list, val_list = train_test_split(train_list, test_size=0.1)

In [22]:
# Data Augumentation
class ImageTransform():

    def __init__(self, resize, mean, std):
        self.data_transform = {
            'train': transforms.Compose([
                transforms.RandomResizedCrop(resize, scale=(0.5, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            'val': transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(resize),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            'test': transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(resize),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ])
        }

    def __call__(self, img, phase):
        return self.data_transform[phase](img)

In [38]:
# Dataset
class DogvsCatDataset(data.Dataset):

    def __init__(self, file_list, transform=None, phase='train'):
        self.file_list = file_list
        self.transform = transform
        self.phase = phase

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):

        img_path = self.file_list[idx]
        img = Image.open(img_path).convert('RGB')  # Ensure the image is in RGB format
        img_transformed = self.transform(img, self.phase)

        # Get Label
        label = train_list[0].split('/')[-2].split('.')[0]
        if label == 'dogs':
            label = 1
        elif label == 'cats':
            label = 0

        return img_transformed, label

In [39]:
# Config
size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [40]:
# Dataset
train_dataset = DogvsCatDataset(train_list, transform=ImageTransform(size, mean, std), phase='train')
val_dataset = DogvsCatDataset(val_list, transform=ImageTransform(size, mean, std), phase='val')
test_dataset = DogvsCatDataset(test_list, transform=ImageTransform(size, mean, std), phase='test')

# Operation Check
print('Operation Check')
index = 0
print(train_dataset.__getitem__(index)[0].size())
print(train_dataset.__getitem__(index)[1])

Operation Check
torch.Size([3, 224, 224])
0


In [41]:
# DataLoader
train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


dataloader_dict = {'train': train_dataloader, 'val': val_dataloader, 'test':test_dataloader}

# Operation Check
print('Operation Check')
batch_iterator = iter(train_dataloader)
inputs, label = next(batch_iterator)
print(inputs.size())
print(label)

Operation Check
torch.Size([32, 3, 224, 224])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])


In [42]:
import torch
import torch.nn as nn
from collections import OrderedDict

class VGG16(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU())

        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU())

        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer5 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())

        self.layer6 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())

        self.layer7 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer8 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())

        self.layer9 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())

        self.layer10 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer11 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())

        self.layer12 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())

        self.layer13 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(7*7*512, 4096),
            nn.ReLU())

        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())

        self.fc2 = nn.Sequential(
            nn.Linear(4096, num_classes))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = self.layer13(out)

        out = out.view(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)

        return out

    def print_parameters(self):
        total_params = 0
        layer_params = OrderedDict()

        # Calculate the total parameters for each type of layer
        for name, param in self.named_parameters():
            if param.requires_grad:
                layer_name = name.split('.')[0]
                layer_params[layer_name] = layer_params.get(layer_name, 0) + param.numel()

        print(f"{'Layer Name':<25} {'Parameters':>15}")
        print('-' * 50)
        for layer_name, params in layer_params.items():
            total_params += params
            print(f"{layer_name:<25} {params:>15}")

        print('-' * 50)
        print(f"{'Total Parameters':<35} {total_params:>15}")

In [43]:
model = VGG16(num_classes=10)
model.print_parameters()


Layer Name                     Parameters
--------------------------------------------------
layer1                               1920
layer2                              37056
layer3                              74112
layer4                             147840
layer5                             295680
layer6                             590592
layer7                             590592
layer8                            1181184
layer9                            2360832
layer10                           2360832
layer11                           2360832
layer12                           2360832
layer13                           2360832
fc                              102764544
fc1                              16781312
fc2                                 40970
--------------------------------------------------
Total Parameters                          134309962


In [44]:
num_classes = 2
num_epochs = 20
batch_size = 16
learning_rate = 0.005

model = VGG16(num_classes).to(device)
print(model)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)


# Train the model
total_step = len(train_dataloader)

VGG16(
  (layer1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer4): Sequential(
    (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=

In [45]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_dataloader):
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Validation
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in val_dataloader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

        print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))

Epoch [1/20], Step [16/16], Loss: 0.0000
Accuracy of the network on the 5000 validation images: 100.0 %
Epoch [2/20], Step [16/16], Loss: 0.0000
Accuracy of the network on the 5000 validation images: 100.0 %
Epoch [3/20], Step [16/16], Loss: 0.0000
Accuracy of the network on the 5000 validation images: 100.0 %
Epoch [4/20], Step [16/16], Loss: 0.0000
Accuracy of the network on the 5000 validation images: 100.0 %
Epoch [5/20], Step [16/16], Loss: 0.0000
Accuracy of the network on the 5000 validation images: 100.0 %
Epoch [6/20], Step [16/16], Loss: 0.0000
Accuracy of the network on the 5000 validation images: 100.0 %
Epoch [7/20], Step [16/16], Loss: 0.0000
Accuracy of the network on the 5000 validation images: 100.0 %
Epoch [8/20], Step [16/16], Loss: 0.0000
Accuracy of the network on the 5000 validation images: 100.0 %
Epoch [9/20], Step [16/16], Loss: 0.0000
Accuracy of the network on the 5000 validation images: 100.0 %
Epoch [10/20], Step [16/16], Loss: 0.0000
Accuracy of the networ

In [69]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_dataloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))

Accuracy of the network on the 10000 test images: 100.0 %
