In [1]:
# TRAINING SETTINGS
NUM_EPOCHS = 15


# LEARNING RATE SETTINGS
BASE_LR = 0.001
DECAY_WEIGHT = 0.1 # factor by which the learning rate is reduced.
EPOCH_DECAY = 30 # number of epochs after which the learning rate is decayed exponentially by DECAY_WEIGHT.


# DATASET INFO
NUM_CLASSES = 500 # set the number of classes in your dataset


# DATALOADER PROPERTIES
BATCH_SIZE = 32


# GPU SETTINGS
CUDA_DEVICE = 0 # Enter device ID of your gpu if you want to run on gpu. Otherwise neglect.
GPU_MODE = 0 # set to 1 if want to run on gpu.


In [2]:
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, models, transforms
import time
import copy
import os
import pandas as pd
import shutil

import torchvision

Benchmark: 6m45s

In [3]:
from dataset_class import FixedUniformDataset

dsets = {}
for split in ['train', 'test']:
    dsets[split] = FixedUniformDataset(split+'ing.pkl')

In [4]:
dset_loaders = {}
for split in ['train', 'test']:
    dset_loaders[split] = torch.utils.data.DataLoader(dsets[split], batch_size=BATCH_SIZE, shuffle=True, num_workers=8)

In [38]:
for data in dset_loaders['train']:
  inputs, labels = data
  break

Testing

In [54]:
inputs, labels = Variable(inputs), Variable(labels)
outputs = model_ft(inputs)
print(outputs[0].shape)
print(labels)
_, preds = torch.max(outputs.data, 1)
criterion = nn.CrossEntropyLoss()
loss = criterion(outputs, labels)
print(preds)
print(loss.item())

torch.Size([500])
tensor([201, 140, 108, 271, 325, 301,  55,  84, 449, 165, 331,  33, 298, 121,
        419, 235, 115, 279, 417, 437, 143, 384, 476, 127, 225, 319, 246, 217,
        240, 368, 461,  68])
tensor([201, 140, 129,   5, 303, 156, 156,  70, 156, 165, 303,  42, 298, 121,
        303, 117, 303, 156, 156, 156, 129, 384, 464, 156, 361, 129, 156, 214,
        303, 156, 156,  70])
6.099215507507324


In [16]:
# USE_TENSORBOARD = False
use_gpu = GPU_MODE
if use_gpu:
    torch.cuda.set_device(CUDA_DEVICE)

def train_model(model, criterion, optimizer, lr_scheduler, num_epochs=100):
    since = time.time()

    best_model = model
    best_acc = 0.0

    accuracies = []
    losses = []
    for epoch in range(num_epochs):
        print('-' * 10)
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        optimizer = lr_scheduler(optimizer, epoch)
        model.train()  # Set model to training mode

        running_loss = 0.0
        running_corrects = 0
        running_total = 0

        counter=0
        # Iterate over data, getting one batch of inputs (images) and labels each time.
        for data in dset_loaders['train']:
            inputs, labels = data

            if use_gpu:
                try:
                    inputs, labels = Variable(inputs.float().cuda()), Variable(labels.long().cuda())
                except Exception as e:
                    print("ERROR! here are the inputs and labels before we print the full stack trace:")
                    print(inputs, labels)
                    raise e
            else:
                inputs, labels = Variable(inputs), Variable(labels)

            # Set gradient to zero to delete history of computations in previous epoch. Track operations so that differentiation can be done automatically.
            optimizer.zero_grad()
            outputs = model(inputs)
            _, preds = torch.max(outputs.data, 1)

            # outputs will be top 1 from softmax, labels will be single int
            loss = criterion(outputs, labels)

            # Print a line every 10 batches so you have something to watch and don't feel like the program isn't running.
            if counter%10==0:
                print("Reached batch iteration", counter)

            counter+=1

            # backward + optimize only if in training phase
            loss.backward()
            optimizer.step()
            try:
                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels.data)
                running_total += len(labels.data)
            except:
                print('unexpected error, could not calculate loss or do a sum.')

            epoch_loss = running_loss / 500
            epoch_acc = running_corrects.item() / running_total
            # print(running_corrects, running_total)
            print('{} Loss: {:.4f} Acc: {:.4f}'.format('training', epoch_loss, epoch_acc))
            accuracies.append(epoch_acc)
            losses.append(epoch_loss)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    print('returning and looping back')

    return best_model, accuracies, losses


# This function changes the learning rate as the model trains.
# If the learning rate is too high, training tends to be unstable and it's harder to converge on an optimal set of weights. 
# But, if learning rate is too low, learning is too slow and you won't converge in a reasonable time frame. A good compromise 
# is to start out with a high learning rate and then reduce it over time. 
def exp_lr_scheduler(optimizer, epoch, init_lr=BASE_LR, lr_decay_epoch=EPOCH_DECAY):
    """Decay learning rate by a factor of DECAY_WEIGHT every lr_decay_epoch epochs."""
    lr = init_lr * (DECAY_WEIGHT**(epoch // lr_decay_epoch))

    if epoch % lr_decay_epoch == 0:
        print('LR is set to {}'.format(lr))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return optimizer

In [6]:
class ThreeLayerModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ThreeLayerModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.fc3(x)
        return torch.softmax(x, dim=1)

model_ft = ThreeLayerModel(1781, 1500, NUM_CLASSES)
print(list(model_ft.parameters()))


criterion = nn.CrossEntropyLoss()

# if use_gpu:
#     criterion.cuda()
#     model_ft.cuda()

optimizer_ft = optim.RMSprop(model_ft.parameters(), lr=0.0001)



[Parameter containing:
tensor([[-0.0211,  0.0113, -0.0087,  ..., -0.0041,  0.0101, -0.0163],
        [-0.0031,  0.0187, -0.0216,  ..., -0.0219, -0.0214, -0.0043],
        [ 0.0068,  0.0094,  0.0024,  ...,  0.0087, -0.0104,  0.0213],
        ...,
        [-0.0213, -0.0044, -0.0145,  ...,  0.0116,  0.0196,  0.0133],
        [ 0.0154, -0.0214,  0.0231,  ...,  0.0066,  0.0153, -0.0227],
        [-0.0142, -0.0119, -0.0191,  ..., -0.0030,  0.0148, -0.0126]],
       requires_grad=True), Parameter containing:
tensor([0.0025, 0.0135, 0.0140,  ..., 0.0119, 0.0168, 0.0087],
       requires_grad=True), Parameter containing:
tensor([[-0.0049,  0.0241, -0.0176,  ...,  0.0102,  0.0023, -0.0180],
        [ 0.0059, -0.0160,  0.0195,  ...,  0.0214,  0.0222,  0.0204],
        [-0.0229,  0.0257,  0.0241,  ..., -0.0069,  0.0181, -0.0242],
        ...,
        [-0.0198,  0.0215,  0.0137,  ..., -0.0233, -0.0047,  0.0165],
        [-0.0248,  0.0013, -0.0081,  ...,  0.0090, -0.0121, -0.0005],
        [-0.0185,

In [21]:
model_ft = ThreeLayerModel(1781, 1520, NUM_CLASSES)
model_ft.load_state_dict(torch.load('small_unif.pt'))
model_ft.eval()

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f7a2acb9e50>
Traceback (most recent call last):
  File "/Users/jocelyn/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
  File "/Users/jocelyn/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1436, in _shutdown_workers
AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status'


: 

In [20]:
# Run the functions and save the best model in the function model_ft.
model_ft, accuracies, losses = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=NUM_EPOCHS)

for split in ['train', 'val']:
    print(split, "accuracies by epoch:", accuracies)
    print(split, "losses by epoch:", losses)

# Save model
torch.save(model_ft.state_dict(), 'med_unif.pt')

----------
Epoch 0/14
----------
LR is set to 0.001
Reached batch iteration 0
training Loss: 0.0124 Acc: 0.0000
training Loss: 0.0249 Acc: 0.0000
training Loss: 0.0373 Acc: 0.0000
training Loss: 0.0497 Acc: 0.0000
training Loss: 0.0621 Acc: 0.0000
training Loss: 0.0746 Acc: 0.0000
training Loss: 0.0870 Acc: 0.0000
training Loss: 0.0994 Acc: 0.0000
training Loss: 0.1119 Acc: 0.0000
training Loss: 0.1243 Acc: 0.0000
Reached batch iteration 10
training Loss: 0.1367 Acc: 0.0000
training Loss: 0.1492 Acc: 0.0000
training Loss: 0.1616 Acc: 0.0000
training Loss: 0.1740 Acc: 0.0000
training Loss: 0.1864 Acc: 0.0000
training Loss: 0.1989 Acc: 0.0000
----------
Epoch 1/14
----------
Reached batch iteration 0
training Loss: 0.0124 Acc: 0.0000
training Loss: 0.0249 Acc: 0.0000
training Loss: 0.0373 Acc: 0.0000
training Loss: 0.0497 Acc: 0.0000
training Loss: 0.0621 Acc: 0.0000
training Loss: 0.0746 Acc: 0.0000
training Loss: 0.0870 Acc: 0.0000
training Loss: 0.0994 Acc: 0.0000
training Loss: 0.111

Small model maxes out at 0.668 accuracy