In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import glob
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models
from torchvision import transforms as t
import matplotlib.pyplot as plt
import time
import os

plt.ion()   # interactive mode

In [2]:
DATA_PATH = "/home/jbenn/data/hymenoptera/"
PHASES = ['train', 'val']
MEAN_TRANSFORM = np.array([0.485, 0.456, 0.406])
STD_TRANSFORM = np.array([0.229, 0.224, 0.225])
BATCH_SIZE = 8

transforms = { 
    'train': t.Compose([
        t.RandomSizedCrop(224),
        t.RandomHorizontalFlip(),
        t.ToTensor(),
#         t.Normalize(MEAN_TRANSFORM, STD_TRANSFORM)
    ]),
    'val': t.Compose([
        t.Scale(256),
        t.CenterCrop(224),
        t.ToTensor(),
#         t.Normalize(MEAN_TRANSFORM, STD_TRANSFORM)
    ])
}
    
image_folders = { 
    phase: datasets.ImageFolder(DATA_PATH + phase, transforms[phase]) 
    for phase in PHASES 
}

class_names = image_folders['val'].classes

dataloaders = { phase: torch.utils.data.DataLoader(
        dataset=image_folders[phase],
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=4
    ) for phase in PHASES }


In [None]:
# def imshow(inp):
#     inp = inp.numpy().transpose((1, 2, 0))
#     mean = np.array([0.485, 0.456, 0.406])
#     std = np.array([0.229, 0.224, 0.225])
#     inp = std * inp + mean
#     plt.imshow((inp * 255).numpy().transpose(1, 2, 0).astype("uint8"))
#     print(inp * 255)
#     if title is not None:
#         plt.title(title)
#     plt.pause(0.001)  # pause a bit so that plots are updated

# inputs, classes = next(iter(dataloaders['train']))
# grid = torchvision.utils.make_grid(inputs)
# ValueError: Floating point image RGB values must be in the 0..1 range.
# imshow(grid, title=[class_names[x] for x in classes])
# imshow(inputs[0])

In [3]:
dataset_sizes = { phase: len(image_folders[phase]) for phase in PHASES }

def train(model, criterion, optimizer, num_epochs):
    loss_history = { 'train': [], 'val': [] }

    for epoch in range(num_epochs):
        for phase in PHASES:
            running_loss = 0.0
            running_corrects = 0

            if phase == 'train':
                model.train(True)
            elif phase == 'val':
                model.train(False)
            
            for inputs, classes in dataloaders[phase]:
                inputs = Variable(inputs.cuda(1))
                classes = Variable(classes.cuda(1))
                
                model.zero_grad()
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                
                loss = criterion(outputs, classes)

                loss_history[phase].append(loss.data[0])
                running_loss += loss.data[0]
                running_corrects += torch.sum(preds == classes.data)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print("{} {}  \tloss: {:.4f}\tacc: {:.4f}".format(epoch, phase, epoch_loss, epoch_acc))

    torch.save(model.state_dict(), "last_weights")
    return loss_history


In [None]:
KERNEL_SIZE = 3

class VGGish(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, KERNEL_SIZE, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, KERNEL_SIZE, padding=1),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, KERNEL_SIZE, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, KERNEL_SIZE, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        self.layer3 = nn.Sequential(
            nn.MaxPool2d(2),
            nn.Conv2d(128, 256, KERNEL_SIZE, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, KERNEL_SIZE, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        self.fc = nn.Linear(in_features=256*56*56, out_features=2)
        self.softmax = nn.Softmax()
    
    def forward(self, inp):
        out = self.layer1(inp)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return self.softmax(out)

model = VGGish().cuda(1)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=.0001, momentum=0.9)
loss_history = train(model, criterion, optimizer, num_epochs=150)

In [None]:
# visualizer
# enumerate val
# wrap vals in var, cudafy
# get predictions
# plt.subplot
# imshow
plt.plot(loss_history['train'])
plt.plot(loss_history['val'])

In [None]:
KERNEL_SIZE = 3

class VGGish(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, KERNEL_SIZE, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, KERNEL_SIZE, padding=1),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, KERNEL_SIZE, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, KERNEL_SIZE, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        self.layer3 = nn.Sequential(
            nn.MaxPool2d(2),
            nn.Conv2d(128, 256, KERNEL_SIZE, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, KERNEL_SIZE, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        self.layer_fc = nn.Sequential(
            nn.Linear(in_features=256*56*56, out_features=200),
            nn.BatchNorm2d(200),
            nn.ReLU(),
            nn.Linear(in_features=200, out_features=200),
            nn.BatchNorm2d(200),
            nn.ReLU(),
            nn.Linear(in_features=200, out_features=2)
        )
        self.softmax = nn.Softmax()
    
    def forward(self, inp):
        out = self.layer1(inp)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = self.layer_fc(out)
        return self.softmax(out)

adam_model = VGGish().cuda(1)
criterion = nn.CrossEntropyLoss()
adam_optimizer = optim.Adam(adam_model.parameters())
adam_loss_history = train(adam_model, criterion, adam_optimizer, num_epochs=150)

0 train  	loss: 0.0851	acc: 0.5820
0 val  	loss: 0.0865	acc: 0.5817
1 train  	loss: 0.0833	acc: 0.5984
1 val  	loss: 0.0873	acc: 0.5882
2 train  	loss: 0.0807	acc: 0.6475
2 val  	loss: 0.0910	acc: 0.5425
3 train  	loss: 0.0785	acc: 0.6598
3 val  	loss: 0.0780	acc: 0.6601
4 train  	loss: 0.0793	acc: 0.6516
4 val  	loss: 0.0878	acc: 0.5686
5 train  	loss: 0.0795	acc: 0.6844
5 val  	loss: 0.0840	acc: 0.6732
6 train  	loss: 0.0797	acc: 0.6598
6 val  	loss: 0.0796	acc: 0.6667
7 train  	loss: 0.0794	acc: 0.6639
7 val  	loss: 0.0898	acc: 0.6209
8 train  	loss: 0.0801	acc: 0.6475
8 val  	loss: 0.0785	acc: 0.6732
9 train  	loss: 0.0792	acc: 0.6516
9 val  	loss: 0.0764	acc: 0.6732
10 train  	loss: 0.0762	acc: 0.7172
10 val  	loss: 0.0801	acc: 0.6928
11 train  	loss: 0.0834	acc: 0.6189
11 val  	loss: 0.0833	acc: 0.6601
12 train  	loss: 0.0814	acc: 0.6516
12 val  	loss: 0.0808	acc: 0.6471
13 train  	loss: 0.0806	acc: 0.6598
13 val  	loss: 0.0891	acc: 0.5882
14 train  	loss: 0.0771	acc: 0.6926
14 v