In [3]:
import os
import cv2
import numpy as np
from tqdm import tqdm, trange
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

rebuild=False

class DvsC():
    imgsize = 50
    paths = ("train\\train\\", "test1\\test1\\")
    labels = {"cat": 0, "dog": 1}
    training_data = []
    
    catcount = 0
    dogcount = 0
    
    def buildtrainingdata(self):
        for path in paths:
            if "train" in path:
                pass
            else:
                for x in tqdm(os.listdir(path)):
                    if "jpg" in x:
                        try:
                            read = cv2.imread(os.path.join(path, x), cv2.IMREAD_GRAYSCALE)
                            read = cv2.resize(read, (self.imgsize, self.imgsize))

                            if "cat" in x:
                                self.training_data.append((np.array(read), np.eye(2)[self.labels['cat']]))
                                self.catcount += 1
                            elif "dog" in x:
                                self.training_data.append((np.array(read), np.eye(2)[self.labels['dog']]))
                                self.dogcount += 1

                        except Exception as e:
                            print(e)
                            pass
                
        np.random.shuffle(self.training_data)
        np.save("dvc_training_data", self.training_data)
        print("Cat count: ", self.catcount)
        print("Dog count: ", self.dogcount)
        
if rebuild:
    dvc = DvsC()
    dvc.buildtrainingdata()


In [4]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

In [10]:
trainset = np.load("dvc_training_data.npy")

img = torch.tensor([trainset[x][0].reshape(1, 50, 50) for x in trange(len(trainset))])
img = (img/255.0).to(device)
label = torch.tensor([trainset[x][1] for x in trange(len(trainset))]).float().to(device)
print("PCT:", int(-len(img)*.2))
print("img:", img.shape)
print("label:", label.shape)

100%|██████████| 25000/25000 [00:00<00:00, 1251254.15it/s]
100%|██████████| 25000/25000 [00:00<00:00, 2278720.45it/s]


PCT: -5000
img: torch.Size([25000, 1, 50, 50])
label: torch.Size([25000, 2])


In [11]:
trainImg = img[:-int(len(img)*.2)]
trainLabel = label[:-int(len(img)*.2)]

testImg = img[-int(len(img)*.2):]
testLabel = label[-int(len(img)*.2):]

print(trainImg.shape)
print(trainLabel.shape)

torch.Size([20000, 1, 50, 50])
torch.Size([20000, 2])


In [12]:
class Convnet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 65, 5)
        self.conv2 = nn.Conv2d(65, 125, 5)
        self.conv3 = nn.Conv2d(125, 150, 5)
        self.fc1 = nn.Linear(2*2*150, 500)
        self.fc2 = nn.Linear(500, 100)
        self.fc3 = nn.Linear(100, 2)
        self.maxpool = nn.MaxPool2d((2, 2))
        
    def forward(self, img):
        inp = self.maxpool(F.relu(self.conv1(img)))
        inp = self.maxpool(F.relu(self.conv2(inp)))
        inp = self.maxpool(F.relu(self.conv3(inp)))
        inp = F.relu(self.fc1(inp.view(-1, 2*2*150)))
        inp = F.relu(self.fc2(inp))
        inp = F.softmax(self.fc3(inp), dim=1)
        return inp
        
net = Convnet().to(device)
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()

In [20]:
def train(net, img, label):

    batch_size = 500
    epoch = 10
    
    for epochs in range(epoch):
        for i in trange(0, len(img), batch_size):
            BatchImg = img[i:i+batch_size]
            BatchLabel = label[i:i+batch_size]

            net.zero_grad()

            output = net(BatchImg)
            loss = loss_function(output, BatchLabel)
            loss.backward()
            optimizer.step()
        print(loss)
        
train(net, trainImg, trainLabel)        

100%|██████████| 40/40 [00:03<00:00, 12.90it/s]


tensor(0.1901, device='cuda:0', grad_fn=<MseLossBackward>)


100%|██████████| 40/40 [00:03<00:00, 12.92it/s]


tensor(0.1591, device='cuda:0', grad_fn=<MseLossBackward>)


100%|██████████| 40/40 [00:03<00:00, 13.17it/s]


tensor(0.1392, device='cuda:0', grad_fn=<MseLossBackward>)


100%|██████████| 40/40 [00:03<00:00, 12.81it/s]


tensor(0.1295, device='cuda:0', grad_fn=<MseLossBackward>)


100%|██████████| 40/40 [00:03<00:00, 13.14it/s]


tensor(0.1201, device='cuda:0', grad_fn=<MseLossBackward>)


100%|██████████| 40/40 [00:02<00:00, 13.23it/s]


tensor(0.1153, device='cuda:0', grad_fn=<MseLossBackward>)


100%|██████████| 40/40 [00:03<00:00, 12.01it/s]


tensor(0.1001, device='cuda:0', grad_fn=<MseLossBackward>)


100%|██████████| 40/40 [00:03<00:00, 12.61it/s]


tensor(0.0969, device='cuda:0', grad_fn=<MseLossBackward>)


100%|██████████| 40/40 [00:03<00:00, 12.83it/s]


tensor(0.0802, device='cuda:0', grad_fn=<MseLossBackward>)


100%|██████████| 40/40 [00:03<00:00, 13.06it/s]


tensor(0.0837, device='cuda:0', grad_fn=<MseLossBackward>)


In [21]:
def test(testImg, testLabel):
    correct = 0
    total = 0
    with torch.no_grad():
        for i in tqdm(range(len(testImg))):
            real_class = torch.argmax(testLabel[i])
            net_out = net(testImg[i].view(-1, 1, 50, 50))  # returns a list, 
            predicted_class = torch.argmax(net_out)

            if predicted_class == real_class:
                correct += 1
            total += 1
    print("Accuracy: ", round(correct/total, 3))

In [22]:
test(testImg, testLabel)

100%|██████████| 5000/5000 [00:08<00:00, 584.28it/s]


Accuracy:  0.775
