In [1]:
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

# constants
repo_path = '/Users/etriesch/dev/tree-finder/'

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
USE_GPU = True
dtype = torch.float32 # We will be using float throughout this tutorial.

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss.
print_every = 100
print('using device:', device)

using device: cpu


## Helper functions

In [3]:
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)

def check_accuracy(loader, model): 
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

def train_model(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy(loader_val, model)
                print()

In [8]:
# how to download public dataset
# mnist_dataset = torchvision.datasets.MNIST(root="./data",   download=True, train=True, transform=transforms.ToTensor())

H, W = 32, 32

# define transformations
transform = T.Compose([T.Resize(H), T.CenterCrop(H), T.ToTensor()])
treeds = dset.ImageFolder(repo_path + 'data/images', transform=transform)
# TODO -- visualize what transofrm is doing

In [9]:
# make dataloaders
PCT_TRAIN, PCT_VAL = 0.80, 0.15
N = len(treeds)
num_train = int(N * PCT_TRAIN)
num_val = int(N * PCT_VAL)
num_test = N - num_train - num_val
print('(train, val, test):', num_train, num_val, num_test)


loader_train = DataLoader(treeds, batch_size=64,
                           sampler=sampler.SubsetRandomSampler(range(num_train)))
loader_val = DataLoader(treeds, batch_size=64,
                           sampler=sampler.SubsetRandomSampler(range(num_train, num_train+num_val)))
loader_test = DataLoader(treeds, batch_size=64,
                           sampler=sampler.SubsetRandomSampler(range(num_train+num_val, N)))

(train, val, test): 1316 246 84


### Fully connected single layer

In [11]:
hidden_layer_size = 4000
learning_rate = 1e-2

model = nn.Sequential(
    Flatten(),
    nn.Linear(3 * H * W, hidden_layer_size),
    nn.ReLU(),
    nn.Linear(hidden_layer_size, 10),
)

# you can use Nesterov momentum in optim.SGD
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                     momentum=0.9, nesterov=True)

train_model(model, optimizer, epochs=10)

Iteration 0, loss = 2.2941
Got 0 / 246 correct (0.00)

Iteration 0, loss = 1.6417
Got 16 / 246 correct (6.50)

Iteration 0, loss = 1.5412
Got 6 / 246 correct (2.44)

Iteration 0, loss = 1.3936
Got 28 / 246 correct (11.38)

Iteration 0, loss = 1.4525
Got 27 / 246 correct (10.98)

Iteration 0, loss = 1.4922
Got 27 / 246 correct (10.98)

Iteration 0, loss = 1.6318
Got 42 / 246 correct (17.07)

Iteration 0, loss = 1.4514
Got 31 / 246 correct (12.60)

Iteration 0, loss = 1.4184
Got 31 / 246 correct (12.60)

Iteration 0, loss = 1.1273
Got 37 / 246 correct (15.04)



### Simple 3-layer convnet

In [None]:
channel_1 = 32
channel_2 = 16
learning_rate = 1e-2

model = nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=channel_1, kernel_size=(5,5), padding=2),
    nn.ReLU(),
    nn.Conv2d(in_channels=channel_1, out_channels=channel_2, kernel_size=(3,3), padding=1),
    nn.ReLU(), 
    Flatten(),
    nn.Linear(channel_2*H*W, num_classes),
)