In [9]:
from __future__ import print_function, division
import os
import time
import torch
import pandas as pd
import numpy as np
# For showing and formatting images
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# For importing datasets into pytorch
import torchvision.datasets as dataset

# Used for dataloaders
from torch.utils.data import DataLoader

# For pretrained resnet34 model
import torchvision.models as models

# For optimisation function
import torch.nn as nn
import torch.optim as optim

# For turning data into tensors
import torchvision.transforms as transforms

# For loss function
import torch.nn.functional as F

# Tensor to wrap data in
from torch.autograd import Variable

In [10]:
PATH = '/home/cell/data/plant_seedlings/model/'
!ls {PATH+"train"}

Black-grass  Common Chickweed  Loose Silky-bent   Shepherds Purse
Charlock     Common wheat      Maize		  Small-flowered Cranesbill
Cleavers     Fat Hen	       Scentless Mayweed  Sugar beet


In [11]:
batch_size = 16
sz = 224

In [12]:
## Image loaders
## Dataset transforms puts the images in tensor form
normalise = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_raw = dataset.ImageFolder(PATH+"train", transform=transforms.Compose([transforms.RandomResizedCrop(sz),
                                                                            transforms.RandomHorizontalFlip(),
                                                                            transforms.ToTensor(),
                                                                           normalise]))
train_loader = DataLoader(train_raw, batch_size=batch_size, shuffle=True, num_workers=4)

valid_raw = dataset.ImageFolder(PATH+"valid", transform=transforms.Compose([transforms.CenterCrop(sz),
                                                                            transforms.ToTensor(),
                                                                           normalise]))
valid_loader = DataLoader(valid_raw, batch_size=batch_size, shuffle=False, num_workers=4)

In [13]:
## Create resnet model
resnet34=models.resnet34(pretrained=True)

num_ftrs = resnet34.fc.in_features

## Freeze all but the last layers
for param in resnet34.parameters():
    ## Each tensor has the flag requires_grad, setting it to false allows freezes
    ## the parmaeter associated with it
    param.requires_grad = False
    
# Parameters of newly constructed modules have requires_grad=True by default
## Create new modules that will become final layer
num_ftrs = resnet34.fc.in_features
print(num_ftrs)
## Give final layers a linear transform with twelve outputs one for each category
resnet34.fc = nn.Linear(num_ftrs, 12)

## Create new model and tell it whether the computer has a GPU or not

## Loss function and optimiser
criterion = nn.CrossEntropyLoss().cuda()
optimiser = optim.Adam(resnet34.fc.parameters(), lr=0.001, weight_decay=0.001)

512


In [14]:
def train(epochs):
    #epoch=1
    resnet34.train()
    time_secs = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        start_time = time.time()
        #print(batch_idx)
        data, target = Variable(data), Variable(target)
        optimiser.zero_grad()
        output = resnet34(data)
        #print("Output: ", output)
        #print("Target: ", target)
        loss=criterion(output, target)
        loss.backward()
        optimiser.step()
        time_secs += (time.time() - start_time)
        if batch_idx % 10 == 0:
            print("Run time for 10 batches was: ", time_secs)
            print('Train epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx*len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.data[0]))
            time_secs = 0
            #break;

In [15]:
def validation():
    resnet34.eval()
    test_loss = 0
    correct = 0
    for data, target in valid_loader:
        data, target = Variable(data, volatile = True), Variable(target)
        output=resnet34(data)
        test_loss += criterion(output, target).data[0]
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
    
    test_loss /= len(valid_loader.dataset)
    
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(valid_loader.dataset),
    100. * correct / len(valid_loader.dataset)))

In [16]:
## Loop through epochs training data and then testing it
for epoch in range(1,10):
    train(epoch)
    validation()

Output:  Variable containing:

Columns 0 to 9 
-0.5781  0.2121  0.1887  0.1256  0.1281 -0.4158  0.2003  0.5057 -0.3397 -0.8250
-0.4615  0.7388  0.4439  0.5166  0.3363 -0.2362 -0.1420  0.0351 -0.5715 -1.3180
-1.1999 -0.5109  1.2589  0.3908  0.2174 -0.2964  0.2744 -0.3217  0.2991 -0.7562
-0.2953  0.3476  0.4944  0.0418  0.5776  0.0504  0.5304 -0.1825  0.1079 -0.3640
-0.4578  0.9297  0.2212  0.4896  1.1289 -0.5055 -0.1432  0.7535  0.0053 -0.2456
-0.0545 -0.0128 -0.2323  0.5592  0.4527 -0.4668  0.3685  0.5290 -0.1497 -0.7779
-0.4005  0.6472  0.5912  0.1947 -0.1378 -0.4433 -0.0601  0.4183 -0.1995 -0.5660
-0.4064  0.1907  0.4220  0.1865  0.2150  0.5875  0.1740  0.1138 -0.0774 -0.1449
-0.5153  0.1477  0.2279  0.6471 -0.2252  0.1533  0.2594 -0.1888 -0.5515 -0.9588
-0.7457  0.5043  0.4228 -0.0536  0.5350  0.2258  0.7039  0.4885  0.4011 -0.6735
 0.1123 -0.1620  0.3412  0.8027  0.4842  0.6364 -0.1176  0.0033 -0.8034 -0.1077
-0.3091  0.2033  0.4059  0.5919  0.2267 -0.4789  0.3526 -0.1479  0.1699 -

RuntimeError: Assertion `cur_target >= 0 && cur_target < n_classes' failed.  at /opt/conda/conda-bld/pytorch_1518244421288/work/torch/lib/THNN/generic/ClassNLLCriterion.c:87