In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import torch.utils.data as data
import torchvision
from torchvision import transforms
import shutil
import torch
from torch.utils.data import Subset
from sklearn.model_selection import train_test_split

In [2]:
class DogsVSCats:
    IMG_SIZE = 50
    ROOT = 'PetImages'
    CATS = "Cat"
    DOGS = "Dog"
    TESTING = ROOT + "/test"
    TRAIN = ROOT + "/train"
    LABELS = {CATS: 0, DOGS: 1}
    training_data = []

    def __init__(self):
        self.data_loaders = None

    @classmethod
    def structure_dataset(cls):
        datasets_img_name = {cls.CATS: [], cls.DOGS: []}
        for label in cls.LABELS:
            sub_folder = cls.ROOT + '/' + label
            for f in tqdm(os.listdir(sub_folder)):
                if "jpg" in f:
                    try:
                        # path = os.path.join(sub_folder, f)
                        datasets_img_name[label].append(f)
                    except Exception as e:
                        pass
        test_account_for = 0.2
        # train_set =
        #  Copy folder:
        for dtype in [cls.DOGS, cls.CATS]:
            img_names = datasets_img_name[dtype]
            for dset in [cls.TRAIN, cls.TESTING]:
                source = cls.ROOT + '/' + label
                dest = os.path.join(dset, dtype)
                if dset == cls.TRAIN:
                    loop_range = range(0, int(len(img_names) * (1 - test_account_for)))
                else:
                    loop_range = range(int(len(img_names) * (1 - test_account_for)), len(img_names))
                for i in tqdm(loop_range):
                    shutil.copyfile(source + '/' + img_names[i], dest + '/' + img_names[i])

    @staticmethod
    def train_val_dataset(dataset, val_split=0.2):
        train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=val_split)
        datasets = {'train': Subset(dataset, train_idx), 'val': Subset(dataset, val_idx)}
        return datasets

    def load_data_from_disk(self):
        TRANSFORM_IMG = transforms.Compose([
            transforms.Resize((50, 50)),
            # transforms.CenterCrop(256),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            # transforms.Normalize(mean=[0.485, 0.456, 0.406],
            #                      std=[0.229, 0.224, 0.225])
        ])

        BATCH_SIZE = 32

        train_data = torchvision.datasets.ImageFolder(root=self.TRAIN, transform=TRANSFORM_IMG)
        test_data = torchvision.datasets.ImageFolder(root=self.TESTING, transform=TRANSFORM_IMG)

        datasets = self.train_val_dataset(train_data)
        self.data_loaders = {x: data.DataLoader(datasets[x], BATCH_SIZE, shuffle=True, num_workers=4) for x in
                             ['train', 'val']}
        self.data_loaders['test'] = data.DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

        idx_to_class = {v: k for k, v in test_data.class_to_idx.items()}
        print(idx_to_class)

        # self.train_data_loader = data.DataLoader(self.train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

        # test_data_loader = data.DataLoader(self.test_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)



In [3]:
# DogsVSCats.structure_dataset()

In [4]:
cat_dog = DogsVSCats()
cat_dog.load_data_from_disk()

{0: 'Cat', 1: 'Dog'}


In [5]:
cat_dog.data_loaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x1ec8266bfd0>,
 'val': <torch.utils.data.dataloader.DataLoader at 0x1ec82631e20>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x1ec8266bbb0>}

In [6]:
for x in ['train', 'val', 'test']:
    print(len(cat_dog.data_loaders[x]))

500
125
157


In [7]:
# need to flatten the output from the last convolutional layer before you can pass it through a regular "dense" layer (or what pytorch calls a linear layer).

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [9]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()  # just run the init of parent class (nn.Module)
        self.conv1 = nn.Conv2d(3, 32, 5)  # input is 1 image, 32 output channels, 5x5 kernel / window
        self.conv2 = nn.Conv2d(32, 64,
                               5)  # input is 32, bc the first layer output 32. Then we say the output will be 64 channels, 5x5 kernel / window
        self.conv3 = nn.Conv2d(64, 128, 5)

        x = torch.randn(3, 50, 50).view(-1, 3, 50, 50)
        # x = torch.randn(50, 50).view(-1, 1, 50, 50)
        self._to_linear = None
        self.convs(x)

        self.fc1 = nn.Linear(self._to_linear, 512)  #flattening.
        self.fc2 = nn.Linear(512, 2)  # 512 in, 2 out bc we're doing 2 classes (dog vs cat).

    def convs(self, x):
        # max pooling over 2x2
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))

        if self._to_linear is None:
            self._to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x

    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)  # .view is reshape ... this flattens X before
        x = F.relu(self.fc1(x))
        x = self.fc2(x)  # bc this is our output layer. No activation here.
        return F.softmax(x, dim=1)


net = Net().cuda()
print(net)

Net(
  (conv1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=2, bias=True)
)


In [10]:
x = torch.randn(1, 3, 50, 50).cuda()
net(x)

tensor([[0.5134, 0.4866]], device='cuda:0', grad_fn=<SoftmaxBackward>)

In [11]:
from torchsummary import summary

summary(net)

Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            2,432
├─Conv2d: 1-2                            51,264
├─Conv2d: 1-3                            204,928
├─Linear: 1-4                            262,656
├─Linear: 1-5                            1,026
Total params: 522,306
Trainable params: 522,306
Non-trainable params: 0


Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            2,432
├─Conv2d: 1-2                            51,264
├─Conv2d: 1-3                            204,928
├─Linear: 1-4                            262,656
├─Linear: 1-5                            1,026
Total params: 522,306
Trainable params: 522,306
Non-trainable params: 0

In [23]:
import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()

In [26]:
BATCH_SIZE = 100
EPOCHS = 10

for epoch in range(EPOCHS):
    for inputs, labels in tqdm(cat_dog.data_loaders['train']):
        net.zero_grad()
        # print(inputs.size())
        outputs = net(inputs.cuda())
        # outputs = net(inputs.view(-1, 1, 50, 50))
        # print(outputs.size())
        loss = loss_function(outputs, torch.nn.functional.one_hot(labels).float().cuda())
        # print(loss.item())
        # loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()  # Does the update
    print(f"Epoch: {epoch}. Loss: {loss}")



100%|██████████| 500/500 [00:17<00:00, 28.26it/s]


Epoch: 0. Loss: 0.24629144370555878


100%|██████████| 500/500 [00:17<00:00, 29.27it/s]


Epoch: 1. Loss: 0.2468465268611908


100%|██████████| 500/500 [00:17<00:00, 28.39it/s]


Epoch: 2. Loss: 0.25217342376708984


100%|██████████| 500/500 [00:16<00:00, 31.08it/s]


Epoch: 3. Loss: 0.2511371076107025


100%|██████████| 500/500 [00:16<00:00, 30.42it/s]


Epoch: 4. Loss: 0.24790258705615997


100%|██████████| 500/500 [00:16<00:00, 30.82it/s]


Epoch: 5. Loss: 0.25479811429977417


100%|██████████| 500/500 [00:14<00:00, 33.93it/s]


Epoch: 6. Loss: 0.25439298152923584


100%|██████████| 500/500 [00:13<00:00, 35.92it/s]


Epoch: 7. Loss: 0.24696531891822815


100%|██████████| 500/500 [00:14<00:00, 33.40it/s]


Epoch: 8. Loss: 0.24926181137561798


100%|██████████| 500/500 [00:13<00:00, 37.01it/s]

Epoch: 9. Loss: 0.2514626681804657





In [27]:
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(cat_dog.data_loaders['test']):
        outputs = net(inputs.cuda())
        predicted_class = torch.argmax(outputs).cpu()
        correct += (predicted_class == labels).sum().item()
        total += len(labels)
    print(total, correct)
    print("Accuracy: ", correct/total)

100%|██████████| 157/157 [00:07<00:00, 19.96it/s]

5000 2500
Accuracy:  0.5



