In [2]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F

REBUILD_DATA = False # set to true to one once, then back to false unless you want to change something in your training data.

class DogsVSCats():
    IMG_SIZE = 50
    CATS = "PetImages/Cat"
    DOGS = "PetImages/Dog"
    TESTING = "PetImages/Testing"
    LABELS = {CATS: 0, DOGS: 1}
    training_data = []

    catcount = 0
    dogcount = 0

    def make_training_data(self):
        for label in self.LABELS:
            print(label)
            for f in tqdm(os.listdir(label)):
                if "jpg" in f:
                    try:
                        path = os.path.join(label, f)
                        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                        img = cv2.resize(img, (self.IMG_SIZE, self.IMG_SIZE))
                        self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])  # do something like print(np.eye(2)[1]), just makes one_hot
                        #print(np.eye(2)[self.LABELS[label]])

                        if label == self.CATS:
                            self.catcount += 1
                        elif label == self.DOGS:
                            self.dogcount += 1

                    except Exception as e:
                        pass
                        #print(label, f, str(e))

        np.random.shuffle(self.training_data)
        np.save("training_data.npy", self.training_data)
        print('Cats:',dogsvcats.catcount)
        print('Dogs:',dogsvcats.dogcount)

if REBUILD_DATA:
    dogsvcats = DogsVSCats()
    dogsvcats.make_training_data()
training_data = np.load("training_data.npy", allow_pickle=True)
print(len(training_data))


24946


In [3]:
class Net(nn.Module):
    def __init__(self):
        super().__init__() # just run the init of parent class (nn.Module)
        self.conv1 = nn.Conv2d(1, 32, 5) # input is 1 image, 32 output channels, 5x5 kernel / window
        self.conv2 = nn.Conv2d(32, 64, 5) # input is 32, bc the first layer output 32. Then we say the output will be 64 channels, 5x5 kernel / window
        self.conv3 = nn.Conv2d(64, 128, 5)

        x = torch.randn(50,50).view(-1,1,50,50)
        self._to_linear = None
        self.convs(x)

        self.fc1 = nn.Linear(self._to_linear, 512) #flattening.
        self.fc2 = nn.Linear(512, 2) # 512 in, 2 out bc we're doing 2 classes (dog vs cat).

    def convs(self, x):
        # max pooling over 2x2
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))

        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x

    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)  # .view is reshape ... this flattens X before
        x = F.relu(self.fc1(x))
        x = self.fc2(x) # bc this is our output layer. No activation here.
        return F.softmax(x, dim=1)


net = Net()

In [4]:
import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()
X = torch.Tensor([i[0] for i in training_data]).view(-1,50,50)
X = X/255.0
y = torch.Tensor([i[1] for i in training_data])
VAL_PCT = 0.1  # lets reserve 10% of our data for validation
val_size = int(len(X)*VAL_PCT)
print(val_size)

2494


In [5]:
train_X = X[:-val_size]
train_y = y[:-val_size]

test_X = X[-val_size:]
test_y = y[-val_size:]
print(len(train_X), len(test_X))

22452 2494


In [6]:
BATCH_SIZE = 300
EPOCHS = 10

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(train_X), BATCH_SIZE)): # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev
        #print(f"{i}:{i+BATCH_SIZE}")
        batch_X = train_X[i:i+BATCH_SIZE].view(-1, 1, 50, 50)
        batch_y = train_y[i:i+BATCH_SIZE]

        net.zero_grad()

        outputs = net(batch_X)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()    # Does the update

    print(f"Epoch: {epoch}. Loss: {loss}")

  0%|          | 0/75 [00:00<?, ?it/s]  1%|▏         | 1/75 [00:00<00:54,  1.36it/s]  3%|▎         | 2/75 [00:01<00:51,  1.42it/s]  4%|▍         | 3/75 [00:01<00:49,  1.47it/s]  5%|▌         | 4/75 [00:02<00:47,  1.50it/s]  7%|▋         | 5/75 [00:03<00:45,  1.55it/s]  8%|▊         | 6/75 [00:03<00:43,  1.58it/s]  9%|▉         | 7/75 [00:04<00:42,  1.59it/s] 11%|█         | 8/75 [00:05<00:42,  1.59it/s] 12%|█▏        | 9/75 [00:05<00:41,  1.60it/s] 13%|█▎        | 10/75 [00:06<00:40,  1.61it/s] 15%|█▍        | 11/75 [00:06<00:39,  1.62it/s] 16%|█▌        | 12/75 [00:07<00:38,  1.65it/s] 17%|█▋        | 13/75 [00:08<00:37,  1.65it/s] 19%|█▊        | 14/75 [00:08<00:36,  1.67it/s] 20%|██        | 15/75 [00:09<00:35,  1.67it/s] 21%|██▏       | 16/75 [00:09<00:35,  1.66it/s] 23%|██▎       | 17/75 [00:10<00:35,  1.65it/s] 24%|██▍       | 18/75 [00:11<00:34,  1.65it/s] 25%|██▌       | 19/75 [00:11<00:34,  1.61it/s] 27%|██▋       | 20/75 [00:12<00:33,  1.62it/s] 28%|██▊ 

Epoch: 0. Loss: 0.23458024859428406
Epoch: 1. Loss: 0.22069334983825684
Epoch: 2. Loss: 0.20414131879806519
Epoch: 3. Loss: 0.19156745076179504
Epoch: 4. Loss: 0.17142048478126526
Epoch: 5. Loss: 0.1597864031791687
Epoch: 6. Loss: 0.14480042457580566
Epoch: 7. Loss: 0.13993820548057556
Epoch: 8. Loss: 0.11949937790632248
Epoch: 9. Loss: 0.11541830003261566


In [9]:
correct = 0
total = 0
with torch.no_grad():
    for i in tqdm(range(len(test_X))):
        real_class = torch.argmax(test_y[i])
        net_out = net(test_X[i].view(-1, 1, 50, 50))[0]  # returns a list, 
        predicted_class = torch.argmax(net_out)

        if predicted_class == real_class:
            correct += 1
        total += 1
print("Accuracy: ", round(correct/total, 3))

  0%|          | 0/2494 [00:00<?, ?it/s]  3%|▎         | 65/2494 [00:00<00:03, 645.45it/s]  6%|▌         | 143/2494 [00:00<00:03, 680.47it/s]  9%|▉         | 222/2494 [00:00<00:03, 708.96it/s] 12%|█▏        | 299/2494 [00:00<00:03, 723.59it/s] 15%|█▌        | 375/2494 [00:00<00:02, 733.38it/s] 18%|█▊        | 450/2494 [00:00<00:02, 736.57it/s] 21%|██        | 517/2494 [00:00<00:02, 685.21it/s] 23%|██▎       | 582/2494 [00:00<00:02, 662.09it/s] 26%|██▌       | 653/2494 [00:00<00:02, 674.26it/s] 29%|██▉       | 730/2494 [00:01<00:02, 698.97it/s] 32%|███▏      | 799/2494 [00:01<00:02, 654.74it/s] 35%|███▌      | 873/2494 [00:01<00:02, 676.09it/s] 38%|███▊      | 946/2494 [00:01<00:02, 689.51it/s] 41%|████      | 1021/2494 [00:01<00:02, 704.90it/s] 44%|████▍     | 1092/2494 [00:01<00:01, 702.54it/s] 47%|████▋     | 1165/2494 [00:01<00:01, 709.64it/s] 50%|████▉     | 1240/2494 [00:01<00:01, 720.77it/s] 53%|█████▎    | 1313/2494 [00:01<00:01, 712.90it/s] 56%|█████▌    | 1

Accuracy:  0.781


False