# Cat Dogs classification

## Part 1. Basic image processing

### Basic image opening

In [None]:
import numpy as np
import os
from PIL import Image #!pip install pillow
from IPython.display import display

In [None]:
with Image.open('../case_studies/data/cat_dog/train/cat/1.jpg') as my_cat:
    # display(my_cat)
    nparray_cat = np.array(my_cat)
    print(nparray_cat.shape)

### How to loop through all the files in folder

In [None]:
path = '../case_studies/data/cat_dog/'
img_names = []  #get all the image names --> os.walk

for folder, subfolders, filenames in os.walk(path):
    # print(f"{folder} | {subfolders} | {filenames}")
    #write whatever filtering you want - to get the file you want
    for name in filenames:
        img_names.append(folder + '/' + name)
        
len(img_names)

### Transformation

- In deep learning for computer vision, we often transform our image as either (1) preprocessing step, or for (2) data augmentation

In [None]:
dog = Image.open('../case_studies/data/cat_dog/train/dog/14.jpg')
# dog.size  #h = 500, w = 386
# display(dog)
# dog.getpixel??
# r, g, b = dog.getpixel((0, 0))
# print(r, g, b)  #this will be useful for filtering

In [None]:
type(dog)

In [None]:
from torchvision import transforms
import matplotlib.pyplot as plt

#most basic transformation, which is making 0-255 to 0-1
transform = transforms.Compose([
    # transforms.RandomRotation(30),
    # transforms.Resize((50)),
    # transforms.CenterCrop(200), ## this is for data augmentation
    transforms.ToTensor()
])
norm_dog = transform(dog)
print(norm_dog.shape)
plt.figure(figsize=(2, 2))
plt.imshow(np.transpose(norm_dog, (1, 2, 0)))

## Part 2. CNN

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models # add models to the list
from torchvision.utils import make_grid
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### 2.1 Transformations

In [2]:
# online data augmentation - dynamically transform our image 
# randomly during batching (through dataloader)

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(30),
    transforms.Resize(224),
    transforms.CenterCrop(224), #this will help focus on the faces
    transforms.ToTensor(), #normalize
])

test_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224), #this will help focus on the faces
    transforms.ToTensor(), #normalize
])

### 2.2 Load the data

In [3]:
root = '../case_studies/data/cat_dog/'

train_data = datasets.ImageFolder(os.path.join(root, 'train'), transform=train_transform)
test_data  = datasets.ImageFolder(os.path.join(root, 'test'),  transform=test_transform)

In [4]:
len(train_data) #18002 images of cats and dogs
len(test_data)  #6998 images of cats and dogs

6998

In [5]:
#is their any imbalance
np_targets = np.array(test_data.targets)

In [6]:
len(np_targets[np_targets==1]) #9001 cats, #9001 dogs | #3499 cats, 3499 dogs

3499

In [7]:
train_data.classes

['cat', 'dog']

### 2.3 Dataloaders

In [8]:
len(test_data)

6998

In [9]:
torch.manual_seed(999)

train_loader = DataLoader(train_data, batch_size=64,  shuffle=True)
test_loader  = DataLoader(test_data, batch_size=6998, shuffle=False)

In [10]:
for image, label in train_loader:
    print(image.shape) #(bs, ch, h, w)
    print(label.shape) #(bs,         )
    break

torch.Size([64, 3, 224, 224])
torch.Size([64])


### 2.4 Define our network

In [None]:
image.shape #(bs, ch, h, w)

In [None]:
l1 = nn.Conv2d(3, 6, 3, 1, 1)  #(in_c, out_c, k, s, p)
l2 = nn.Conv2d(6, 16, 3, 1, 1) 

l2(l1(image)).shape

In [None]:
class chaky_cnn(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Conv2d(3, 6, 3, 1, 1)  #(in_c, out_c, k, s, p)
        self.l2 = nn.Conv2d(6, 16, 3, 1, 1) 
        self.fc1 = nn.Linear(16*224*224, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)
    def forward(self, image):
        out = F.relu(self.l1(image))
        out = F.relu(self.l2(out))
        out = out.reshape((-1, 16*224*224))
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [None]:
#test case
model = chaky_cnn()
out   = model(image)
assert out.shape[1] == 2

### 2.5 Loss and optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
optim     = torch.optim.Adam(model.parameters(), lr=0.001)

### 2.6 Training!!

In [None]:
# device = torch.device('cuda0') #for GPU
# model.to(device)

In [None]:
num_epochs = 5
for i in range(num_epochs):
    train_correct = 0
    
    for images, labels in train_loader:
        # images.to(device) #for GPU
        # labels.to(device) #for GPU
        
        y_hat = model(images) #yhat: (bs, classes)
        loss  = criterion(y_hat, labels)
        
        real_pred = torch.max(y_hat, 1)[1]
        train_correct  += (real_pred == labels).sum()/images.shape[0]
                
        optim.zero_grad()
        loss.backward()
        optim.step()
        
    print(f"Epoch: {i} | Train acc: {train_correct/len(train_loader):3.2f} | Loss: {loss.item():3.4f}")

### 2.7 Inference

In [None]:
some_sample_image = test_data[256][0]
some_sample_image.shape

In [None]:
output = model(some_sample_image)
output

In [None]:
torch.max(output, 1)[1]
# train_data.classes

In [None]:
test_data[256][1]

## Part 3. Using pretrained models

In [11]:
from torchvision import models #actually pretrained models are offered by many vendors

In [12]:
alexnet = models.alexnet(weights=models.AlexNet_Weights.DEFAULT)

In [13]:
#freeze the parameters
#you don't need to train the parameters
for param in alexnet.parameters():
    param.requires_grad = False  #you don't allow any parameter in AlexNet to learn

In [14]:
alexnet.classifier = nn.Sequential(
    nn.Linear(in_features=9216, out_features=1024),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(1024, 2)
)

In [17]:
criterion = nn.CrossEntropyLoss()
optim = torch.optim.Adam(alexnet.parameters(), lr = 0.001)

In [18]:
num_epochs = 5
for i in range(num_epochs):
    train_correct = 0
    
    for images, labels in train_loader:
        y_hat = alexnet(images) #yhat: (bs, classes)
        loss  = criterion(y_hat, labels)
        
        real_pred = torch.max(y_hat, 1)[1]
        train_correct  += (real_pred == labels).sum()/images.shape[0]
                
        optim.zero_grad()
        loss.backward()
        optim.step()
        
    print(f"Epoch: {i} | Train acc: {train_correct/len(train_loader):3.2f} | Loss: {loss.item():3.4f}")

Epoch: 0 | Train acc: 0.89 | Loss: 0.2527
Epoch: 1 | Train acc: 0.91 | Loss: 0.1178
Epoch: 2 | Train acc: 0.92 | Loss: 0.1719
Epoch: 3 | Train acc: 0.92 | Loss: 0.1969
Epoch: 4 | Train acc: 0.93 | Loss: 0.1429
