In [1]:
# TRAINING SETTINGS
NUM_EPOCHS = 15


# LEARNING RATE SETTINGS
BASE_LR = 0.001
DECAY_WEIGHT = 0.1 # factor by which the learning rate is reduced.
EPOCH_DECAY = 30 # number of epochs after which the learning rate is decayed exponentially by DECAY_WEIGHT.


# DATASET INFO
NUM_CLASSES = 500 # set the number of classes in your dataset


# DATALOADER PROPERTIES
BATCH_SIZE = 32


# GPU SETTINGS
CUDA_DEVICE = 0 # Enter device ID of your gpu if you want to run on gpu. Otherwise neglect.
GPU_MODE = 0 # set to 1 if want to run on gpu.


In [2]:
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, models, transforms
import time
import copy
import os
import pandas as pd
import shutil

import torchvision

Benchmark: 6m45s

In [15]:
from dataset_class import FixedUniformDataset

dsets = {}
# for split in ['train', 'test']:
for split in ['train']:
    dsets[split] = FixedUniformDataset(split+'ing.pkl')

In [16]:
dset_loaders = {}
# for split in ['train', 'test']:
for split in ['train']:
    dset_loaders[split] = torch.utils.data.DataLoader(dsets[split], batch_size=BATCH_SIZE, shuffle=True, num_workers=8)

In [13]:
# testing
for data in dset_loaders['train']:
  inputs, labels = data
  break

Testing

In [14]:
inputs, labels = Variable(inputs), Variable(labels)
outputs = model_ft(inputs)
print(outputs[0].shape)
print(labels)
_, preds = torch.max(outputs.data, 1)
criterion = nn.CrossEntropyLoss()
loss = criterion(outputs, labels)
print(preds)
print(loss.item())

torch.Size([500])
tensor([346, 102, 357, 345, 344, 355,  72, 192,  11, 412, 448, 477, 261, 495,
        452, 108, 485, 246, 266, 118, 449, 293, 488, 104, 174, 309,  88, 212,
         68, 362,  39,  79])
tensor([346, 102, 357, 345, 344, 359,  74, 192,  17, 424, 359, 494, 431, 442,
        359, 108, 485, 246, 266, 118, 359, 293, 489, 104, 174, 309,  88, 359,
         74, 362,  42,  74])
5.6702728271484375


In [10]:
# USE_TENSORBOARD = False
use_gpu = GPU_MODE
if use_gpu:
    torch.cuda.set_device(CUDA_DEVICE)

def train_model(model, criterion, optimizer, lr_scheduler, num_epochs=100):
    since = time.time()

    best_model = model
    best_acc = 0.0

    accuracies = []
    losses = []
    for epoch in range(num_epochs):
        print('-' * 10)
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        # optimizer = lr_scheduler(optimizer, epoch)
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        model.train()  # Set model to training mode

        running_loss = 0.0
        running_corrects = 0
        running_total = 0

        counter=0
        # Iterate over data, getting one batch of inputs (images) and labels each time.
        for data in dset_loaders['train']:
            inputs, labels = data

            if use_gpu:
                try:
                    inputs, labels = Variable(inputs.float().cuda()), Variable(labels.long().cuda())
                except Exception as e:
                    print("ERROR! here are the inputs and labels before we print the full stack trace:")
                    print(inputs, labels)
                    raise e
            else:
                inputs, labels = Variable(inputs), Variable(labels)

            # Set gradient to zero to delete history of computations in previous epoch. Track operations so that differentiation can be done automatically.
            optimizer.zero_grad()
            outputs = model(inputs)
            _, preds = torch.max(outputs.data, 1)

            # outputs will be top 1 from softmax, labels will be single int
            loss = criterion(outputs, labels)

            # Print a line every 10 batches so you have something to watch and don't feel like the program isn't running.
            if counter%10==0:
                print("Reached batch iteration", counter)

            counter+=1

            # backward + optimize only if in training phase
            loss.backward()
            optimizer.step()
            try:
                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels.data)
                running_total += len(labels.data)
            except:
                print('unexpected error, could not calculate loss or do a sum.')

            epoch_loss = running_loss / 500
            epoch_acc = running_corrects.item() / running_total
            # print(running_corrects, running_total)
            print('{} Loss: {:.4f} Acc: {:.4f}'.format('training', epoch_loss, epoch_acc))
            accuracies.append(epoch_acc)
            losses.append(epoch_loss)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    print('returning and looping back')

    return best_model, accuracies, losses


# This function changes the learning rate as the model trains.
# If the learning rate is too high, training tends to be unstable and it's harder to converge on an optimal set of weights. 
# But, if learning rate is too low, learning is too slow and you won't converge in a reasonable time frame. A good compromise 
# is to start out with a high learning rate and then reduce it over time. 
def exp_lr_scheduler(optimizer, epoch, init_lr=BASE_LR, lr_decay_epoch=EPOCH_DECAY):
    """Decay learning rate by a factor of DECAY_WEIGHT every lr_decay_epoch epochs."""
    lr = init_lr * (DECAY_WEIGHT**(epoch // lr_decay_epoch))

    if epoch % lr_decay_epoch == 0:
        print('LR is set to {}'.format(lr))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return optimizer

In [6]:
class ThreeLayerModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ThreeLayerModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.fc3(x)
        return torch.softmax(x, dim=1)

model_ft = ThreeLayerModel(1781, 1500, NUM_CLASSES)
print(list(model_ft.parameters()))


criterion = nn.CrossEntropyLoss()

# if use_gpu:
#     criterion.cuda()
#     model_ft.cuda()

optimizer_ft = optim.RMSprop(model_ft.parameters(), lr=0.0001)



[Parameter containing:
tensor([[ 1.0600e-02, -4.1639e-03, -1.7228e-02,  ..., -1.7263e-02,
         -8.3633e-06, -1.8699e-02],
        [-1.5243e-02,  1.4260e-02,  2.3253e-02,  ..., -9.9739e-03,
         -2.3216e-02, -1.8259e-02],
        [-1.3728e-02,  1.3375e-02, -2.1706e-03,  ...,  1.8029e-02,
          1.0457e-02, -2.1160e-02],
        ...,
        [ 1.0232e-02,  2.1694e-02, -1.2204e-03,  ...,  1.5188e-02,
          9.7068e-03, -8.0177e-03],
        [ 2.2638e-02, -1.1897e-02, -1.6493e-02,  ...,  1.0579e-02,
          1.0479e-03, -1.2000e-02],
        [ 2.4121e-03, -8.3900e-03, -1.8227e-02,  ...,  1.2980e-02,
         -3.3715e-03,  3.1476e-03]], requires_grad=True), Parameter containing:
tensor([ 0.0236,  0.0053, -0.0048,  ...,  0.0145, -0.0056,  0.0196],
       requires_grad=True), Parameter containing:
tensor([[-0.0168, -0.0145,  0.0215,  ...,  0.0191, -0.0153, -0.0139],
        [-0.0184,  0.0044, -0.0039,  ..., -0.0156, -0.0018, -0.0247],
        [-0.0177,  0.0087, -0.0042,  ..., -

In [8]:
model_ft = ThreeLayerModel(1781, 1500, NUM_CLASSES)
model_ft.load_state_dict(torch.load('small_unif.pt'))
model_ft.eval()

ThreeLayerModel(
  (fc1): Linear(in_features=1781, out_features=1500, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=1500, out_features=1500, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=1500, out_features=500, bias=True)
)

In [11]:
# Run the functions and save the best model in the function model_ft.
model_ft, accuracies, losses = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=1)

for split in ['train', 'val']:
    print(split, "accuracies by epoch:", accuracies)
    print(split, "losses by epoch:", losses)

# Save model
# torch.save(model_ft.state_dict(), 'med_unif.pt')

----------
Epoch 0/0
----------


Reached batch iteration 0
training Loss: 0.0111 Acc: 0.6875
training Loss: 0.0221 Acc: 0.7031
training Loss: 0.0330 Acc: 0.7188
training Loss: 0.0442 Acc: 0.6953
training Loss: 0.0552 Acc: 0.6937
training Loss: 0.0664 Acc: 0.6823
training Loss: 0.0775 Acc: 0.6830
training Loss: 0.0887 Acc: 0.6758
training Loss: 0.0999 Acc: 0.6701
training Loss: 0.1110 Acc: 0.6687
Reached batch iteration 10
training Loss: 0.1220 Acc: 0.6761
training Loss: 0.1331 Acc: 0.6719
training Loss: 0.1441 Acc: 0.6803
training Loss: 0.1554 Acc: 0.6719
training Loss: 0.1664 Acc: 0.6750
training Loss: 0.1778 Acc: 0.6680
Training complete in 1m 0s
Best val Acc: 0.000000
returning and looping back
train accuracies by epoch: [0.6875, 0.703125, 0.71875, 0.6953125, 0.69375, 0.6822916666666666, 0.6830357142857143, 0.67578125, 0.6701388888888888, 0.66875, 0.6761363636363636, 0.671875, 0.6802884615384616, 0.671875, 0.675, 0.668]
train losses by epoch: [0.011061152458190919, 0.022061598777770995, 0.03300006103515625, 0.04418

Small model maxes out at 0.668 accuracy