In [11]:
# ================
#  PREPROCESSING
# ================
import glob
import random
import cv2
import torch
import numpy as np
import torchvision
import torchvision.transforms as tf 
from torchvision import datasets
#torch.set_default_tensor_type(torch.FloatTensor)

def loadDirectory(filepath, isOrig):
    # load directory
    files = glob.glob(filepath)

    arr = []
    for fl in files:
        img = cv2.imread(fl)

        # resize image to 256x256
        if(isOrig != True and img.shape[0] != 256): 
            img = cv2.resize(img, (256, 256))

        arr.append(img)

    return arr

def numpyToTensor(arr):
    #convert to tensor
    arr = np.array(arr)
    arr = arr.transpose((0, 3, 1, 2))
    tensorList = torch.FloatTensor(arr)

    return tensorList

def createNewWaldoSamples(notWaldos, overlay):
    newWaldos = []
    oRows, oCols, oChannels = overlay.shape
    for img in notWaldos:
        newImg = img
        iRows, iCols, iChannels = newImg.shape
        newImg = np.dstack([newImg, np.ones((iRows, iCols), dtype='uint8') * 255])

        randX = random.randint(0, iRows-oRows)
        randY = random.randint(0, iCols-oCols)

        aOverlay = overlay[:, :, 3] / 255.0
        aImg = 1.0 - aOverlay

        for c in range(0, 3):
            newImg[randX:randX+oRows, randY:randY+oCols, c] = (aOverlay * overlay[:, :, c] + aImg * newImg[randX:randX+oRows, randY:randY+oCols, c])

        newWaldos.append(newImg[:, :, :3])
    
    return newWaldos
        
# Loading images datasets
print("Loading dataset...")
originalImg = loadDirectory("./original-images/*.jpg", True)

waldo64 = loadDirectory("./64/waldo/*.jpg", False)
notWaldo64 = loadDirectory("./64/notwaldo/*.jpg", False)

waldo128 = loadDirectory("./128/waldo/*.jpg", False)
notWaldo128 = loadDirectory("./128/notwaldo/*.jpg", False)

waldo256 = loadDirectory("./256/waldo/*.jpg", False)
notWaldo256 = loadDirectory("./256/notwaldo/*.jpg", False)

# Creating new Waldo samples by overlaying a Waldo png over a notWaldo sample
print("Creating new Waldo samples...")
waldoOverlay64 = cv2.imread('./waldo64.png', cv2.IMREAD_UNCHANGED)
waldoOverlay128 = cv2.imread('./waldo128.png', cv2.IMREAD_UNCHANGED)
waldoOverlay256 = cv2.imread('./waldo256.png', cv2.IMREAD_UNCHANGED)

moreWaldo64 = createNewWaldoSamples(notWaldo64, waldoOverlay64)
moreWaldo128 = createNewWaldoSamples(notWaldo128, waldoOverlay128)
moreWaldo256 = createNewWaldoSamples(notWaldo256, waldoOverlay256)

# Converting numpy arrays into tensors (TODO: FINISH CONVERSION FUNCTION)
print("Converting numpy arrays into tensors...")
waldo64Tensor = numpyToTensor(waldo64)
waldo128Tensor = numpyToTensor(waldo128)
waldo256Tensor = numpyToTensor(waldo256)

moreWaldo64Tensor = numpyToTensor(moreWaldo64)
moreWaldo128Tensor = numpyToTensor(moreWaldo128)
moreWaldo256Tensor = numpyToTensor(moreWaldo256)

notWaldo64Tensor = numpyToTensor(notWaldo64)
notWaldo128Tensor = numpyToTensor(notWaldo128)
notWaldo256Tensor = numpyToTensor(notWaldo256)

# Combining into two lists: waldos and not waldos
waldos = torch.cat((waldo64Tensor, waldo128Tensor, waldo256Tensor, moreWaldo64Tensor, moreWaldo128Tensor, moreWaldo256Tensor), 0)
notWaldos = torch.cat((notWaldo64Tensor, notWaldo128Tensor, notWaldo256Tensor), 0)

# All values inbetween 0 and 1
waldos = waldos / 255.0
notWaldos = notWaldos / 255.0

print("Waldo:NotWaldo Count : ", waldos.size(dim=0), ":", notWaldos.size(dim=0))

# Create labels & combine
waldoLabels = torch.cat((torch.ones(len(waldos)), torch.zeros(len(notWaldos))), 0)
allWaldos = torch.cat((waldos, notWaldos), 0)

waldoDataset = torch.utils.data.TensorDataset(allWaldos, waldoLabels)
print("Dataset loaded")

Loading dataset...
Creating new Waldo samples...
Converting numpy arrays into tensors...
Waldo:NotWaldo Count :  7037 : 6940
Dataset loaded


In [22]:
# ============
#   THE CNN
# ============
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Network should output whether or not the input image has waldo in it
class WaldoFinder(nn.Module):
    def __init__(self):
        super(WaldoFinder, self).__init__()

        # Block 1
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=256, kernel_size=5, stride=1, padding=2)
        self.batchNorm1 = nn.BatchNorm2d(num_features=256)
        self.dropout1 = nn.Dropout2d(p=0.1)

        # Block 2
        self.conv2 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.batchNorm2 = nn.BatchNorm2d(num_features=128)
        self.dropout2 = nn.Dropout2d(p=0.1)

        # Block 3
        self.conv3 = nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.batchNorm3 = nn.BatchNorm2d(num_features=64)
        self.dropout3 = nn.Dropout2d(p=0.1)

        # Block 4
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=1, kernel_size=32, stride=1, padding=0)

        self.maxPool = nn.MaxPool2d(kernel_size=2, stride=2)

    #Forward function - convolvs down to 16x16 image and ultimately outputs 1 or 0
    def forward(self, t):
        t = self.conv1(t)
        t = self.batchNorm1(t)
        t = F.relu(t)
        t = self.dropout1(t)
        t = self.maxPool(t)

        t = self.conv2(t)
        t = self.batchNorm2(t)
        t = F.relu(t)
        t = self.dropout2(t)
        t = self.maxPool(t)

        t = self.conv3(t)
        t = self.batchNorm3(t)
        t = F.relu(t)
        t = self.dropout3(t)
        t = self.maxPool(t)

        t = self.conv4(t)

        return t

waldoFinder = WaldoFinder()
print("Network Initialized")

# Divide into training and test set
tenPercent = int(len(waldoDataset) * 0.1)
ninetyPercent = len(waldoDataset) - tenPercent
trainSet, testSet = torch.utils.data.random_split(waldoDataset, [ninetyPercent, tenPercent])

trainLoader = torch.utils.data.DataLoader(trainSet, shuffle=True, batch_size=10)
testLoader = torch.utils.data.DataLoader(testSet, shuffle=True, batch_size=10)
print("Dataset shuffled")

Network Initialized
Dataset shuffled


In [27]:
# Train Loop
optimizer = optim.Adam(waldoFinder.parameters(), lr=.01)
#lossFunc = nn.CrossEntropyLoss()
lossFunc = nn.BCEWithLogitsLoss()

i = 0
print("Begining training...")
for items, labels in trainLoader:
    optimizer.zero_grad()
    preds = waldoFinder(items).squeeze()
    
    loss = lossFunc(preds, labels)
    loss.backward()
    optimizer.step()
    
    print("Batch ", i, ": ", loss.item())
    i += 1

Begining training...
Batch  0 :  0.6281522512435913
Batch  1 :  183.32867431640625
Batch  2 :  4.3555498123168945
Batch  3 :  15.758875846862793
Batch  4 :  58.70960235595703
Batch  5 :  29.72129249572754
Batch  6 :  0.0
Batch  7 :  9.630595207214355
Batch  8 :  3.8423163890838623
Batch  9 :  0.31772178411483765
Batch  10 :  2.6921546459198
Batch  11 :  20.500654220581055
Batch  12 :  36.20126724243164
Batch  13 :  6.551748752593994
Batch  14 :  29.972278594970703
Batch  15 :  0.0
Batch  16 :  0.0
Batch  17 :  10.751923561096191
Batch  18 :  0.007122138049453497
Batch  19 :  31.92510986328125
Batch  20 :  0.0
Batch  21 :  0.0
Batch  22 :  17.84917449951172
Batch  23 :  0.0
Batch  24 :  68.47174072265625
Batch  25 :  8.520901679992676
Batch  26 :  2.802706480026245
Batch  27 :  0.0
Batch  28 :  0.0
Batch  29 :  1.2680915594100952
Batch  30 :  8.655827522277832
Batch  31 :  8.95247745513916
Batch  32 :  0.0
Batch  33 :  0.029461484402418137
Batch  34 :  0.0002398830110905692
Batch  35 : 

KeyboardInterrupt: 