In [1]:
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
import cv2                 # working with, mainly resizing, images
import numpy as np         # dealing with arrays
import os                  # dealing with directories
from random import shuffle # mixing up or currently ordered data that might lead our network astray in training.
from tqdm import tqdm      # a nice pretty percentage bar for tasks. Thanks to viewer Daniel BA1/4hler for this suggestion

TRAIN_DIR = 'train'
TEST_DIR = 'test'
IMG_SIZE = 64
LR = 1e-3

In [3]:
import torch.nn as nn
import torch.nn.functional as F

In [4]:
def create_label(image_name):
    """ Create an one-hot encoded vector from image name """
    word_label = image_name.split('.')[-3]
    if word_label == 'cat':
        return np.array([1,0])
    elif word_label == 'dog':
        return np.array([0,1])

In [5]:
def create_train_data():
    training_data = []
    for img in tqdm(os.listdir(TRAIN_DIR)):
        path = os.path.join(TRAIN_DIR, img)
        img_data = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img_data = cv2.resize(img_data, (IMG_SIZE, IMG_SIZE))
        training_data.append([np.array(img_data), create_label(img)])
    shuffle(training_data)
    np.save('train_data.npy', training_data)
    return training_data
def create_test_data():
    testing_data = []
    for img in tqdm(os.listdir(TEST_DIR)):
        path = os.path.join(TEST_DIR,img)
        img_num = img.split('.')[0]
        img_data = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img_data = cv2.resize(img_data, (IMG_SIZE, IMG_SIZE))
        testing_data.append([np.array(img_data), img_num])
        
    shuffle(testing_data)
    np.save('test_data.npy', testing_data)
    return testing_data

In [6]:
# train_data = create_train_data()
# test_data = create_test_data()
train_data = np.load("train_data.npy")
test_data = np.load("test_data.npy")

In [7]:
train = train_data[:-500]
test = train_data[-500:]
X_train = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y_train = [i[1] for i in train]
X_test = np.array([i[0] for i in test]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y_test = [i[1] for i in test]

In [8]:
import torch.nn as nn
import torch.nn.functional as F


class DogVsCat(nn.Module):

    def __init__(self):
        super(DogVsCat, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3,64,stride=1,kernel_size=3),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(64,64,stride=1,kernel_size=3),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv2d(64,128,stride=1, kernel_size=3),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        self.dropout = nn.Dropout()
        self.fc1 = nn.Linear(128*64*64, 64)
        self.fc2 = nn.Sequential(
            nn.Linear(64, 2),
            nn.Softmax()
        )
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.dropout(F.relu(x.view(x.size(0), -1)))
        x = self.fc2()
        return x


In [9]:
model = DogVsCat()

In [10]:
model

DogVsCat(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dropout): Dropout(p=0.5)
  (fc1): Linear(in_features=524288, out_features=64, bias=True)
  (fc2): Sequential(
    (0): Linear(in_features=64, out_features=2, bias=True)
    (1): Softmax()
  )
)

In [11]:
from torch.utils.data import Dataset, DataLoader

In [12]:
import torch.utils.data as data_utils

In [13]:
class DriveData(Dataset):

    def __init__(self, X, y, transform=None):
        try:
            if len(X) != len(y):
                raise Exception
            self.X = X
            self.y = y
        except:
            print("error")

    # Override to give PyTorch access to any image on the dataset
    def __getitem__(self, index):
        image  = torch.from_numpy(self.X[index])
        label  = torch.from_numpy(self.y[index])
        return image, label

    # Override to give PyTorch size of dataset
    def __len__(self):
        return len(self.X)

In [14]:
dset_train = DriveData(X_train, y_train)

In [15]:
train_loader = DataLoader(dset_train, batch_size=10, shuffle=True)

In [16]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x201a0aa5b00>

In [17]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [18]:
total_step = len(train_loader)
for epoch in range(1):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

RuntimeError: Given groups=1, weight[64, 3, 3, 3], so expected input[10, 64, 64, 1] to have 3 channels, but got 64 channels instead

In [None]:
model.eval()
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i in range(0, len(X_train)):
        # get the inputs
        label = torch.from_numpy(y_train[i])
        inputs = torch.from_numpy(X_train[i])

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

In [None]:
x = torch.randn(32, 1, 3, 3)
output = model(x)