In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
from dataloader import DataLoader
import numpy as np
import utils
# from torchsummary import summary
import os 

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Setup Data Loader

In [2]:
dataloader = DataLoader(utils.train_dir, batch_size=24, transfer=False, normalize=True)

In [2]:
USE_GPU = True
dtype = torch.float32

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
    print("gpu available!")
else:
    device = torch.device('cpu')
    print("gpu NOT available!")

gpu available!


In [4]:
files = os.listdir(utils.val_dir)
print('val')
set([fname.split('reg')[1].split('_')[0] for fname in files])

val


{'011', '016', '023', '030'}

In [5]:
files = os.listdir(utils.test_dir)
print('test')
set([fname.split('reg')[1].split('_')[0] for fname in files])

test


{'005', '006', '017', '019'}

In [6]:
files = os.listdir(utils.train_dir)
print('train')
completed = set([fname.split('reg')[1].split('_')[0] for fname in files])
print('completed:', completed)
remaining = set(['015','004','014','024','020','007','008','027','034','012']) - completed
print('remaining:', remaining)

train
completed: {'004', '027', '012', '008', '024', '014', '020', '007', '034', '015'}
remaining: set()


In [7]:
len(os.listdir(utils.train_dir))

13069

# Training/Validating and Utility Functions

In [8]:
def check_train_accuracy(model):
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    
    loader = DataLoader(utils.train_dir, batch_size=10, transfer=False)
    
    with torch.no_grad():
        for i, (fxy, x, y) in enumerate(loader):
            
            x = torch.from_numpy(x)
            y = torch.from_numpy(y)
            
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            
            scores = model(x)
            _, preds = scores.max(1)

            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
            if i > 5:
                break
            
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [9]:
def check_val_accuracy(model, it):
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    
    loader = DataLoader(utils.val_dir, batch_size=24, transfer=False)
    
    val_loss = 0
    total = 0
    with torch.no_grad():
        for i, (fx, x, y) in enumerate(loader):
            total += 1
            x = torch.from_numpy(x)
            y = torch.from_numpy(y)
            
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
            val_loss += F.cross_entropy(scores, y)
        
        if it % 100 == 0:
            acc = float(num_correct) / num_samples
            print('Val accuracy: %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
            print('Val loss: {0:0.4f}'.format(val_loss / total))

    return val_loss / total

In [3]:
def flatten(x):
    N = x.shape[0] # read in N, C, H, W
    return x.view(N, -1)

class Flatten(nn.Module):
    def forward(self, x):
        return flatten(x)
    
class PrintLayer(nn.Module):
    def __init__(self):
        super(PrintLayer, self).__init__()
    def forward(self, x):
        print(x.shape)
        return x

# Define Model

In [6]:
# model = torchvision.models.vgg19(pretrained=False)
# print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (17): ReLU(inplace)

In [13]:
# same structure as vgg19 model print(torchvision.models.vgg19(pretrained=False, progress = True))
model = nn.Sequential(
    
    nn.Conv2d(75, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), #64
#     nn.BatchNorm2d(128),
    nn.ReLU(inplace = True),
    nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(128),
    nn.ReLU(inplace = True),
    nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    
    nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(256),
    nn.ReLU(inplace = True),
    nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(256),
    nn.ReLU(inplace = True),
    nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    
    nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(256),
    nn.ReLU(inplace = True),
    nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(256),
    nn.ReLU(inplace = True),
    nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(256),
    nn.ReLU(inplace = True),
    nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(256),
    nn.ReLU(inplace = True),
    nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    
    nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(512),
    nn.ReLU(inplace = True),
    nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(512),
    nn.ReLU(inplace = True),
    nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(512),
    nn.ReLU(inplace = True),
    nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(512),
    nn.ReLU(inplace = True),
    nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    
    nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(512),
    nn.ReLU(inplace = True),
    nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(512),
    nn.ReLU(inplace = True),
    nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(512),
    nn.ReLU(inplace = True),
    nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
#     nn.BatchNorm2d(512),
    nn.ReLU(inplace = True),
    nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    
    Flatten(),
    
    nn.Linear(in_features= 4608, out_features=4096, bias=True),
    nn.ReLU(inplace = True),
    nn.Dropout(p=0.5),
    
    nn.Linear(in_features=4096, out_features=4096, bias=True),
    nn.ReLU(inplace = True),
    nn.Dropout(p=0.5),
    
    nn.Linear(in_features=4096, out_features=2, bias=True),
    
)

In [360]:
# TO DEBUG MODEL:::
# add PrintLayer(), or use summary from torchsummary module:
# summary(your_model, input_size=(channels, H, W))
# https://towardsdatascience.com/model-summary-in-pytorch-b5a1e4b64d25

# Overfit Data

In [371]:
# overfitLoader = DataLoader(utils.train_dir, batch_size=24, transfer=False, mode = 'dev')

In [372]:
# def train_overfit(model, optimizer, epochs=10):
#     """
#     Train a model on image data using the PyTorch Module API.
    
#     Inputs:
#     - model: A PyTorch Module giving the model to train.
#     - optimizer: An Optimizer object we will use to train the model
#     - epochs: (Optional) A Python integer giving the number of epochs to train for
    
#     Returns: Nothing, but prints model accuracies during training.
#     """
    
#     print_every = 4
#     train_every = 10
#     val_every = 20
#     model = model.to(device=device)  # move the model parameters to CPU/GPU
#     loss_history = []
#     for e in range(epochs):
#         for t, (fxy, x, y) in enumerate(overfitLoader):
            
#             model.train()  # put model to training mode
            
#             x = torch.from_numpy(x)
#             y = torch.from_numpy(y)
            
#             x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
#             y = y.to(device=device, dtype=torch.long)

#             scores = model(x)
#             loss = F.cross_entropy(scores, y)

#             # Zero out all of the gradients for the variables which the optimizer
#             # will update.
#             optimizer.zero_grad()

#             # This is the backwards pass: compute the gradient of the loss with
#             # respect to each  parameter of the model.
#             loss.backward()

#             # Actually update the parameters of the model using the gradients
#             # computed by the backwards pass.
#             optimizer.step()

#             if t % print_every == 0:
#                 print('Iteration %d, loss = %.4f' % (t, loss.item()))
            
#             loss_history.append(loss.item())
    
#     return loss_history

In [None]:
# learning_rate = 1e-5
# optimizer = optim.Adam(model.parameters(),
#                       lr=learning_rate)

# train_overfit(model, optimizer)

Iteration 0, loss = 0.5847
Iteration 4, loss = 0.3507
Iteration 8, loss = 0.3354
Iteration 12, loss = 0.3514
Iteration 16, loss = 0.1937
Iteration 20, loss = 0.1278
Iteration 24, loss = 0.5615
Iteration 28, loss = 0.2393
Iteration 32, loss = 0.3077
Iteration 36, loss = 0.3151
Iteration 40, loss = 0.1274
Iteration 44, loss = 0.1092
Iteration 48, loss = 0.3363
Iteration 52, loss = 0.1074
Iteration 56, loss = 0.0847
Iteration 0, loss = 0.2859
Iteration 4, loss = 0.1880
Iteration 8, loss = 0.1270
Iteration 12, loss = 0.1063


In [None]:
# plt.plot(loss_histrory)

# Train Model

In [14]:
def train(model, optimizer, epochs=10):
    """
    Train a model on image data using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    
    train_loader = DataLoader(utils.train_dir, batch_size=36, transfer=False, normalize=True)
#     val_loader = DataLoader(utils.val_dir, batch_size=24, transfer=False)
    
    print_every = 10
    train_every = 10
    val_every = 20
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    loss_history = []
    val_history = []
    
    cur_val = 999
    consec_increases = 0
    
    for e in range(epochs):
        t = 0
        for fdl_train in train_loader:
            
            fxy, x, y = fdl_train
            model.train()  # put model to training mode
            
            x = torch.from_numpy(x)
            y = torch.from_numpy(y)
            
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()
            
#             # validation loss
#             with torch.no_grad():
#                 model.eval()
#                 fxy, x, y = fdl_val
#                 x = torch.from_numpy(x)
#                 y = torch.from_numpy(y)

#                 x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
#                 y = y.to(device=device, dtype=torch.long)

#                 scores = model(x)
#                 val_loss = F.cross_entropy(scores, y)

            if t % print_every == 0:
                print('Iter %d, train loss = %.4f' % (t + print_every, loss.item()))
            
#             val_history.append(val_loss.item())
            
#             if val_loss.item() > cur_val:
#                 consec_increases += 1
#             else:
#                 consec_increases = 0
            
#             cur_val = val_loss.item()
            
#             if consec_increases >= 3:
#                 print('Stopping early due to validation loss increase')
#                 torch.save(model, utils.model_dir + "conv2d_v2_earlystop_epoch{}.pt".format(e))
#                 return loss_history, val_history
            
            loss_history.append(loss.item())
            t += 1
            
        torch.save(model, utils.model_dir + "conv2d_v2_std_epoch%s.pt" % e)
    
    return loss_history, val_history

In [15]:
learning_rate = 1e-5
#optimizer = optim.SGD(model.parameters(),
 #                     lr=learning_rate,
  #                    momentum=0.9,
   #                   nesterov=True)
    
optimizer = optim.Adam(model.parameters(),
                      lr=learning_rate)

loss_history, val_history = train(model, optimizer)
torch.save(model, utils.model_dir + "conv2d_v2_std_full.pt")

Iter 10, train loss = 0.6975
Iter 20, train loss = 0.6932
Iter 30, train loss = 0.6914
Iter 40, train loss = 0.6956
Iter 50, train loss = 0.6927
Iter 60, train loss = 0.6912
Iter 70, train loss = 0.6864
Iter 80, train loss = 0.6979
Iter 90, train loss = 0.6949
Iter 100, train loss = 0.6841
Iter 110, train loss = 0.6872
Iter 120, train loss = 0.6837
Iter 130, train loss = 0.6919
Iter 140, train loss = 0.6895
Iter 150, train loss = 0.6888
Iter 160, train loss = 0.7006
Iter 170, train loss = 0.7019
Iter 180, train loss = 0.6885
Iter 190, train loss = 0.7015
Iter 200, train loss = 0.6844
Iter 210, train loss = 0.6796
Iter 220, train loss = 0.7066
Iter 230, train loss = 0.6971
Iter 240, train loss = 0.6880
Iter 250, train loss = 0.6943
Iter 260, train loss = 0.6937
Iter 270, train loss = 0.6781
Iter 280, train loss = 0.6985
Iter 290, train loss = 0.6912
Iter 300, train loss = 0.7024
Iter 310, train loss = 0.6765
Iter 320, train loss = 0.6636
Iter 330, train loss = 0.6482
Iter 340, train los

In [20]:
import matplotlib.pyplot as plt
plt.plot(loss_history, c="blue", label="train")
plt.plot(val_history, c="orange", label="val")
plt.legend()
plt.show()

NameError: name 'loss_history' is not defined

In [23]:
test_loader = DataLoader(utils.test_dir, batch_size=24, transfer=False)
model.eval()

num_correct = 0
num_samples = 0
test_loss = 0
total = 0
with torch.no_grad():
    for i, (fx, x, y) in enumerate(test_loader):
        total += 1
        x = torch.from_numpy(x)
        y = torch.from_numpy(y)

        x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
        y = y.to(device=device, dtype=torch.long)

        scores = model(x)
        _, preds = scores.max(1)
        num_correct += (preds == y).sum()
        num_samples += preds.size(0)
        test_loss += F.cross_entropy(scores, y)

    acc = float(num_correct) / num_samples
    print('Test accuracy: %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
    print('Test loss: {0:0.4f}'.format(val_loss / total))

Test accuracy: 3693 / 4507 correct (81.94)
Test loss: 0.0000
