In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data
import torchvision.datasets as datasets
import torchvision.models as models
import matplotlib.pyplot as plt
import numpy as np
# set all pytorch default tensortype to DoubleTensor
torch.set_default_tensor_type('torch.DoubleTensor')
# for large neural nets, improve performance
torch.backends.cudnn.benchmark=True
# seed random seed
torch.manual_seed(21)

<torch._C.Generator at 0x235ded7a530>

In [2]:
args={
    "batch_size_GPU":150,
    "batch_size_CPU":4
}

In [3]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

train_loader = data.DataLoader(
        datasets.ImageFolder('imagefolder',
                             transforms.Compose([
                                 transforms.Resize((150,150)),
                                 #transforms.RandomResizedCrop(224),
                                 #transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 normalize,
                             ])),
        batch_size=args["batch_size_GPU"],
        shuffle=True,
        num_workers=5,
pin_memory=True)

test_loader = data.DataLoader(
        datasets.ImageFolder('test_imagefolder',
                             transforms.Compose([
                                 transforms.Resize((150,150)),
                                 #transforms.RandomResizedCrop(224),
                                 #transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 normalize,
                             ])),
        batch_size=args["batch_size_GPU"],
        shuffle=True,
        num_workers=5,
pin_memory=True)
classes = ("cat","dog")

In [4]:
# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))


# get some random training images
dataiter = iter(train_loader)
images, labels = dataiter.next()

# show images
#imshow(torchvision.utils.make_grid(images))
# print labels
#print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

In [7]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        #self.conv3 = nn.Conv2d(16, 34, 5)
        #self.conv4 = nn.Conv2d(26, 36, 5)
        self.fc1 = nn.Linear(16 * 34 * 34, 240)
        self.fc2 = nn.Linear(240, 120)
        self.fc3 = nn.Linear(120, 60)
        #self.fc4 = nn.Linear(120, 80)
        #self.fc5 = nn.Linear(80, 10)
        #self.fc6 = nn.Linear(45,10)
        

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        #x = self.pool(F.relu(self.conv3(x)))
        #x = self.pool(F.relu(self.conv4(x)))
        # refer to this x.size() when changing img size, layers etc
        #print(x.size())
        x = x.view(-1, 16 * 34 * 34)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        #x = F.relu(self.fc3(x))
        #x = F.relu(self.fc4(x))
        #x = F.relu(self.fc5(x))
        x = self.fc3(x)
        return x


net = Net()
# use GPU
net = net.cuda()
net.benchmark = True

In [8]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=0.0001,)

In [9]:
for epoch in range(20):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data
        # Change the type for Conv
        inputs = inputs.type('torch.cuda.DoubleTensor')
        labels = labels.type('torch.cuda.LongTensor')
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i%10==9:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 10))
            running_loss = 0.0
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print('Finished Training')
torch.save(net.state_dict(), 'cat-v-dog-model-240-120-60-20iter.pt')
print("NN Model saved")
# ... after training, save your model 
#torch.save(model.state_dict(), './model_Q2.pth')

# .. to load your previously training model:
#model.load_state_dict(torch.load('mytraining.pt'))

[1,    10] loss: 3.409
[1,    20] loss: 1.727
[1,    30] loss: 0.834
[1,    40] loss: 0.709
[1,    50] loss: 0.689
[1,    60] loss: 0.676
[1,    70] loss: 0.669
[1,    80] loss: 0.665
[1,    90] loss: 0.658
[1,   100] loss: 0.651
[1,   110] loss: 0.651
[1,   120] loss: 0.644
[1,   130] loss: 0.626
[1,   140] loss: 0.631
[1,   150] loss: 0.654
[2,    10] loss: 0.633
[2,    20] loss: 0.619
[2,    30] loss: 0.635
[2,    40] loss: 0.625
[2,    50] loss: 0.625
[2,    60] loss: 0.639
[2,    70] loss: 0.625
[2,    80] loss: 0.622
[2,    90] loss: 0.615
[2,   100] loss: 0.625
[2,   110] loss: 0.620
[2,   120] loss: 0.607
[2,   130] loss: 0.603
[2,   140] loss: 0.612
[2,   150] loss: 0.617
[3,    10] loss: 0.609
[3,    20] loss: 0.610
[3,    30] loss: 0.600
[3,    40] loss: 0.600
[3,    50] loss: 0.604
[3,    60] loss: 0.594
[3,    70] loss: 0.581
[3,    80] loss: 0.619
[3,    90] loss: 0.597
[3,   100] loss: 0.601
[3,   110] loss: 0.606
[3,   120] loss: 0.606
[3,   130] loss: 0.587
[3,   140] 

In [8]:
dataiter = iter(test_loader)
testimages, labels = dataiter.next()
testimages = testimages.type('torch.DoubleTensor')
testimages = testimages.type('torch.cuda.DoubleTensor')
outputs = net(testimages)
_, predicted = torch.max(outputs, 1)
predicted
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
                              for j in range(4)))

Predicted:    cat   dog   dog   cat


In [10]:
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images = images.type('torch.cuda.DoubleTensor')
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        predicted = predicted.type('torch.DoubleTensor')
        total += labels.size(0)
        labels = labels.type('torch.DoubleTensor')
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the test images: 75 %


In [None]:
"""
Utility function for computing output of convolutions
takes a tuple of (h,w) and returns a tuple of (h,w)
"""
def conv_output_shape(h_w, kernel_size=1, stride=1, pad=0, dilation=1):
    from math import floor
    if type(kernel_size) is not tuple:
        kernel_size = (kernel_size, kernel_size)
    h = floor( ((h_w[0] + (2 * pad) - ( dilation * (kernel_size[0] - 1) ) - 1 )/ stride) + 1)
    print("H calc")
    w = floor( ((h_w[1] + (2 * pad) - ( dilation * (kernel_size[1] - 1) ) - 1 )/ stride) + 1)
    print("w calc")
    return h, w

In [None]:
conv_output_shape((6,16))