In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import torch.optim as optim
import torch
import torch.nn as nn
import torch.nn.functional as F

REBUILD_DATA = True

In [18]:
# def check_device(self):
"""
check if cuda is available then set device
"""

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    count = torch.cuda.device_count()
    print(f"Running on GPU, {count}")
else:
    device = torch.devive("cpu")
    print("Running on GPU")

Running on GPU, 1


In [3]:
class CatDogClassifier():
    img_size = 50
    dog = "PetImages/Dog"
    cat = "PetImages/Cat"
    labels = {
        cat: 0,
        dog: 1
    }
    training_data = []
    dog_count = 0
    cat_count = 0

    def make_training_data(self):
        """
        open files in folder, grayscale then resize
        append to training data as array and one hot vector
        """
        print("reading files...")
        for label in self.labels:
            print(label)
            for f in tqdm(os.listdir(label)):
                try:
                    path = os.path.join(label, f)
                    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                    img = cv2.resize(
                        img, 
                        (self.img_size, self.img_size)
                    )
                    self.training_data.append(
                        [
                            np.array(img), 
                            np.eye(2)[self.labels[label]]
                        ]
                    )
                    if label == self.cat:
                        self.cat_count += 1
                    else:
                        self.dog_count +=1
                        
                except Exception as e:
                    pass
        
        np.random.shuffle(self.training_data)
        np.save("training_data.npy", self.training_data)
        print("Cats: ", self.cat_count)
        print("Dogs: ", self.dog_count)

    def load_training_data(self):
        if not os.path.exists("training_data.npy"):
            print("Training data save does not exists!")
            return None
        self.training_data = np.load("training_data.npy", allow_pickle=True)
        print("Training data loaded...")
        return self.training_data
                    

In [4]:
class Net(nn.Module):
    """
    define layers, , 
    """
    BATCH_SIZE = 100
    EPOCHS = 18
    
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 5)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.conv3 = nn.Conv2d(64, 128, 5)

        x = torch.randn(50, 50).view(-1, 1, 50, 50)
        self._to_linear = None
        self.convs(x)
        
        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, 2)

    def convs(self, x):
        # get flattened linear matrix shape
        x = F.max_pool2d(
            F.relu(self.conv1(x)), (2, 2)
        )
        x = F.max_pool2d(
            F.relu(self.conv2(x)), (2, 2)
        )
        x = F.max_pool2d(
            F.relu(self.conv3(x)), (2, 2)
        )

        if self._to_linear is None:
            self._to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]

        return x

    def forward(self, x):
        # forward function
        x = self.convs(x)
        x = x.view(-1, self._to_linear)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x, dim=1)   


In [5]:
c = CatDogClassifier()
tr = c.load_training_data()

Training data loaded...


In [6]:
net = Net()

In [7]:
"""
represent training data as X, y from training data
"""

X = torch.Tensor([i[0] for i in tr]).view(-1, 50, 50)
X = X/255.0
y = torch.Tensor([x[1] for x in tr])


test_percentage = 0.1
test_value_size = int(len(X) * test_percentage)

print(test_value_size)

  X = torch.Tensor([i[0] for i in tr]).view(-1, 50, 50)


2494


In [8]:
train_X = X[:-test_value_size]
train_y = y[:-test_value_size]

test_X = X[-test_value_size:]
test_y = y[-test_value_size:]

print(len(train_X))
print(len(test_X))

22452
2494


In [19]:
net.to(device)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=2, bias=True)
)

In [None]:
BATCH_SIZE = 100
EPOCHS = 10

def train(net):
    optimizer = optim.Adam(net.parameters(), lr=0.001)
    loss_function = nn.MSELoss()
    for epoch in range(EPOCHS):
        for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
            batch_x = train_X[i:i + BATCH_SIZE].view(-1, 1, 50, 50).to(device)
            batch_y = train_y[i:i + BATCH_SIZE].to(device)
    
            net.zero_grad()
            outputs = net(batch_x)
            loss = loss_function(outputs, batch_y)
            loss.backward()
            optimizer.step()

        print(f"Epoch: {epoch}, Loss: {loss}")


In [10]:
def test(net):
    correct = 0
    total = 0
    with torch.no_grad():
        for i in tqdm(range(len(test_X))):
            real_class = torch.argmax(test_y[i])
            net_out = net(
                test_X[i].view(-1, 1, 50, 50).to(device)
            )[0]
            predicted_class = torch.argmax(net_out)
            if predicted_class == real_class:
                correct += 1
            total += 1
    print("Accuracy: ", round(correct/total, 3))

In [24]:
def fwd_pass(X, y, train=False):
    loss_function = nn.MSELoss()
    if train:
        net.zero_grad()
    outputs = net(X)
    matches = [torch.argmax(i) == torch.argmax(j) for i, j in zip(outputs, y)]
    acc = matches.count(True)/len(matches)
    loss = loss_function(outputs, y)

    if train:
        loss.backward()
        optimizer.step()

    return acc, loss

In [35]:
def test(size=50):
    random_start = np.random.randint(len(test_X)-size)
    X, y = test_X[random_start:random_start+size], test_y[random_start:random_start+size]
    with torch.no_grad():
        val_acc, val_loss = fwd_pass(X.view(-1, 1, 50, 50).to(device), y.to(device))
    return val_acc, val_loss



In [27]:
train_net = train(net)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 225/225 [00:26<00:00,  8.61it/s]


Epoch: 0, Loss: 0.05185152217745781


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 225/225 [00:26<00:00,  8.60it/s]


Epoch: 1, Loss: 0.031129466369748116


 94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 211/225 [00:24<00:01,  8.55it/s]


KeyboardInterrupt: 

In [33]:
test_val, test_loss = test()

In [36]:
test_val, test_loss

(0.62, tensor(0.2750, device='cuda:0', grad_fn=<MseLossBackward0>))