# Simple Image Classifier

Implementing Chapter 2 of [Programming PyTorch for Deep Learning](http://shop.oreilly.com/product/0636920216032.do)

In [1]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

## Architecture

This is a simple three layer network: an input and hidden layer, with a two-node output layer.

It is *fully-connected* in that each node in each layer affects every node the next layer.

In [2]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.input_layer = nn.Linear(12288, 84)
        self.hidden_layer = nn.Linear(84, 50)
        self.output_layer = nn.Linear(50, 2)
        
    def forward(self, x):
        x = x.view(-1, 12288)
        x = F.Relu(self.input_layer(x))
        x = F.Relu(self.hidden_layer(x))
        x = self.output_layer(x)
        return x

In [3]:
simple = SimpleNN()

In [4]:
optimizer = optim.Adam(simple.parameters(), lr=0.0001)

In [5]:
def train(model, optimizer, loss_fn, training_loader, validation_loader, epochs, device="cpu"):
    for epoch in xrange(epochs):
        training_loss = 0.0
        validation_loss = 0.0
        model.train()
        for (inputs, targets) in training_loader:
            optimizer.zero_grad()
            inputs = inputs.to(device)
            targets = targets.to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item()
        training_loss /= len(training_loader)
        
        number_correct = 0
        number_examples = 0
        model.eval()
        for (inputs, targets) in validation_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)
            validation_loss += loss.data.item()
            correct = torch.eq(torch.max(F.softmax(outputs), dim=1)[1], targets).view(-1)
            number_correct += torch.sum(correct).item()
            number_examples += correct.shape[0]
        validation_loss /= len(validation_loader)
        
        print('Epoch: {}, Training Loss: {:.2f}, Validation Loss: {:.2f}, accuracy = {:.2f}'.format(
            epoch, 
            training_loss,
            validation_loss, 
            number_correct / num_examples))

## Data

Using the [technique from lesson 2](https://github.com/dpapathanasiou/course-v3/blob/master/nbs/dl1/lesson2-download.ipynb) of the [Fast.ai course](https://course.fast.ai/), get a list of image urls with some in-browser javascript:

```javascript
urls=Array.from(document.querySelectorAll('.rg_i')).map(el=> el.hasAttribute('data-src')?el.getAttribute('data-src'):el.getAttribute('data-iurl'));window.open('data:text/csv;charset=utf-8,' + escape(urls.join('\n')));
```

In [6]:
from pathlib import Path
import urllib.request

def fetch_images(image_url_list, target):
    data_file = Path(image_url_list)
    for i, url in enumerate(data_file.read_text().splitlines()):
        image_file = Path(target) / str(i) # TODO: determine file extension, since ImageFolders needs it
        urllib.request.urlretrieve(url, image_file)

Instead of `cat` versus `fish`, this notebook will attempt [aikido](https://en.wikipedia.org/wiki/Aikido) verus [judo](https://en.wikipedia.org/wiki/Judo)

In [9]:
fetch_images("./data/aikido_train.csv", "./data/train/aikido")

In [10]:
fetch_images("./data/aikido_validate.csv", "./data/validate/aikido")

In [11]:
fetch_images("./data/aikido_test.csv", "./data/test/aikido")

In [12]:
fetch_images("./data/judo_train.csv", "./data/train/judo")
fetch_images("./data/judo_validate.csv", "./data/validate/judo")
fetch_images("./data/judo_test.csv", "./data/test/judo")

In [7]:
import torchvision
from torchvision import transforms

In [8]:
image_transform = transforms.Compose(
    [transforms.Resize(64), 
     transforms.ToTensor(),    
     transforms.Normalize(mean=[0.485, 0.456, 0.406],                    
                          std=[0.229, 0.224, 0.225])])

In [9]:
training_data   = torchvision.datasets.ImageFolder(root="./data/train",    transform=image_transform)
validation_data = torchvision.datasets.ImageFolder(root="./data/validate", transform=image_transform)
test_data       = torchvision.datasets.ImageFolder(root="./data/test",     transform=image_transform)

In [10]:
from torch.utils.data import DataLoader

In [11]:
batch_size = 10
training_loader   = DataLoader(training_data, batch_size=batch_size)
validation_loader = DataLoader(validation_data, batch_size=batch_size)
test_loader       = DataLoader(test_data, batch_size=batch_size)

## Train and evaluate

In [12]:
device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device("cuda")

In [None]:
# def train(model, optimizer, loss_fn, training_loader, validation_loader, epochs, device="cpu"):
train(simple, optimizer, torch.nn.CrossEntropyLoss(), train_data_loader, test_data_loader, 20, device)