In [1]:
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
import PIL                 # working with, mainly resizing, images
import numpy as np         # dealing with arrays
from random import shuffle # mixing up or currently ordered data that might lead our network astray in training.
from tqdm import tqdm      # a nice pretty percentage bar for tasks. Thanks to viewer Daniel BA1/4hler for this suggestion

import cv2
from os import listdir
from os.path import isfile, join


TRAIN_DIR = 'train'
TEST_DIR = 'test'
IMG_SIZE = 64
LR = 1e-3

In [3]:
def create_label(word_label):
    """ Create an one-hot encoded vector from image name """
    if "dog" in word_label:
        return np.array([1,0])
    elif "cat" in word_label:
        return np.array([0,1])

In [4]:
import torch.nn as nn
import torch.nn.functional as F


class DogVsCat(nn.Module):

    def __init__(self):
        super(DogVsCat, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3,64,stride=1,kernel_size=3),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(64,64,stride=1,kernel_size=3),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv2d(64,128,stride=1, kernel_size=3),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        self.dropout = nn.Dropout()
        self.fc1 = nn.Linear(128*64*64, 64)
        self.fc2 = nn.Sequential(
            nn.Linear(64, 2),
            nn.Softmax()
        )
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.dropout(F.relu(x.view(x.size(0), -1)))
        x = self.fc2()
        return x


In [5]:
model = DogVsCat()

In [6]:
model

DogVsCat(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dropout): Dropout(p=0.5)
  (fc1): Linear(in_features=524288, out_features=64, bias=True)
  (fc2): Sequential(
    (0): Linear(in_features=64, out_features=2, bias=True)
    (1): Softmax()
  )
)

In [7]:
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets

In [8]:
import torch.utils.data as data_utils

In [9]:
cpt = sum([len(files) for r, d, files in os.walk(TRAIN_DIR)])

In [10]:
cpt

25000

In [11]:
class DriveData(Dataset):

    def __init__(self, path,transform=None):
        self.Path = path
        self.PathFIle =  [f for f in listdir(path) if isfile(join(path, f))]

    # Override to give PyTorch access to any image on the dataset
    def __getitem__(self, index):
        path = self.PathFIle[index]
        path = os.path.join(self.Path,path)
        image = PIL.Image.open(path)
        img_data = np.asarray(image)
        img_data = cv2.resize(img_data, (IMG_SIZE, IMG_SIZE))
        image  = torch.from_numpy(img_data)
        label  = torch.from_numpy(create_label(path))
        return image, label
    
    def __len__(self):
        cpt = sum([len(files) for r, d, files in os.walk(self.Path)])
        return cpt
        

In [12]:
print(create_label("train\dog.9552.jpg"))

[1 0]


In [13]:
"dog" in "train\dog.9552.jpg"

True

In [14]:
dset_train = DriveData(path=TRAIN_DIR)

In [15]:
train_loader = DataLoader(dset_train, batch_size=10, shuffle=True)

In [16]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x2ab929343c8>

In [17]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [22]:
total_step = len(train_loader)
print(total_step)
for epoch in range(total_step):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        print("ijdoidsj")
        # Forward pass
        for i in range(len(images)):
            outputs = model(images[i].unsqueeze_(0))
            loss = criterion(outputs[i], labels[i])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

2500
ijdoidsj


RuntimeError: Given groups=1, weight[64, 3, 3, 3], so expected input[1, 64, 64, 3] to have 3 channels, but got 64 channels instead

In [20]:
images.shape

torch.Size([10, 64, 64, 3])

In [None]:
model.eval()
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i in range(0, len(X_train)):
        # get the inputs
        label = torch.from_numpy(y_train[i])
        inputs = torch.from_numpy(X_train[i])

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

In [None]:
x = torch.randn(32, 1, 3, 3)
output = model(x)