In [1]:
# TRAINING SETTINGS
NUM_EPOCHS = 15


# LEARNING RATE SETTINGS
BASE_LR = 0.001
DECAY_WEIGHT = 0.1 # factor by which the learning rate is reduced.
EPOCH_DECAY = 30 # number of epochs after which the learning rate is decayed exponentially by DECAY_WEIGHT.


# DATASET INFO
NUM_CLASSES = 500 # set the number of classes in your dataset


# DATALOADER PROPERTIES
BATCH_SIZE = 32


# GPU SETTINGS
CUDA_DEVICE = 0 # Enter device ID of your gpu if you want to run on gpu. Otherwise neglect.
GPU_MODE = 0 # set to 1 if want to run on gpu.


In [2]:
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, models, transforms
import time
import copy
import os
import pandas as pd
import shutil

import torchvision

Benchmark: 6m45s

In [3]:
from dataset_class import FixedUniformDataset

dsets = {}
# for split in ['train', 'test']:
for split in ['train']:
    dsets[split] = FixedUniformDataset(split+'ing.pkl')

In [4]:
dset_loaders = {}
# for split in ['train', 'test']:
for split in ['train']:
    dset_loaders[split] = torch.utils.data.DataLoader(dsets[split], batch_size=BATCH_SIZE, shuffle=True, num_workers=8)

In [9]:
from torch.profiler import profile, record_function, ProfilerActivity

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], profile_memory=True, record_shapes=True) as prof:

    # testing
    for data in dset_loaders['train']:
        inputs, labels = data
        break

  warn("CUDA is not available, disabling CUDA profiling")
STAGE:2024-04-02 12:33:31 24432:20534365 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2024-04-02 12:33:31 24432:20534365 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2024-04-02 12:33:31 24432:20534365 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


AttributeError: 'DataLoader' object has no attribute 'getitem'

Testing

In [14]:
inputs, labels = Variable(inputs), Variable(labels)
outputs = model_ft(inputs)
print(outputs[0].shape)
print(labels)
_, preds = torch.max(outputs.data, 1)
criterion = nn.CrossEntropyLoss()
loss = criterion(outputs, labels)
print(preds)
print(loss.item())

torch.Size([500])
tensor([346, 102, 357, 345, 344, 355,  72, 192,  11, 412, 448, 477, 261, 495,
        452, 108, 485, 246, 266, 118, 449, 293, 488, 104, 174, 309,  88, 212,
         68, 362,  39,  79])
tensor([346, 102, 357, 345, 344, 359,  74, 192,  17, 424, 359, 494, 431, 442,
        359, 108, 485, 246, 266, 118, 359, 293, 489, 104, 174, 309,  88, 359,
         74, 362,  42,  74])
5.6702728271484375


In [30]:
# USE_TENSORBOARD = False
use_gpu = GPU_MODE
if use_gpu:
    torch.cuda.set_device(CUDA_DEVICE)

# def train_model(model, criterion, optimizer, lr_scheduler, num_epochs=100):
#     since = time.time()

#     best_model = model
#     best_acc = 0.0

#     accuracies = []
#     losses = []
#     for epoch in range(num_epochs):
#         print('-' * 10)
#         print('Epoch {}/{}'.format(epoch, num_epochs - 1))
#         print('-' * 10)

#         # Each epoch has a training and validation phase
#         optimizer = lr_scheduler(optimizer, epoch)
#         # optimizer = optim.Adam(model.parameters(), lr=0.001)
#         model.train()  # Set model to training mode

#         running_loss = 0.0
#         running_corrects = 0
#         running_total = 0

#         counter=0
#         # Iterate over data, getting one batch of inputs (images) and labels each time.
#         for data in dset_loaders['train']:
#             inputs, labels = data

#             if use_gpu:
#                 try:
#                     inputs, labels = Variable(inputs.float().cuda()), Variable(labels.long().cuda())
#                 except Exception as e:
#                     print("ERROR! here are the inputs and labels before we print the full stack trace:")
#                     print(inputs, labels)
#                     raise e
#             else:
#                 inputs, labels = Variable(inputs), Variable(labels)

#             # Set gradient to zero to delete history of computations in previous epoch. Track operations so that differentiation can be done automatically.
#             optimizer.zero_grad()
#             outputs = model(inputs)
#             _, preds = torch.max(outputs.data, 1)

#             # outputs will be top 1 from softmax, labels will be single int
#             loss = criterion(outputs, labels)

#             # Print a line every 10 batches so you have something to watch and don't feel like the program isn't running.
#             if counter%10==0:
#                 print("Reached batch iteration", counter)

#             counter+=1

#             # backward + optimize only if in training phase
#             loss.backward()
#             optimizer.step()
#             try:
#                 running_loss += loss.item()
#                 running_corrects += torch.sum(preds == labels.data)
#                 running_total += len(labels.data)
#             except:
#                 print('unexpected error, could not calculate loss or do a sum.')

#             epoch_loss = running_loss / 500
#             epoch_acc = running_corrects.item() / running_total
#             # print(running_corrects, running_total)
#             print('{} Loss: {:.4f} Acc: {:.4f}'.format('training', epoch_loss, epoch_acc))
#             accuracies.append(epoch_acc)
#             losses.append(epoch_loss)

#     time_elapsed = time.time() - since
#     print('Training complete in {:.0f}m {:.0f}s'.format(
#         time_elapsed // 60, time_elapsed % 60))
#     print('Best val Acc: {:4f}'.format(best_acc))
#     print('returning and looping back')

#     return best_model, accuracies, losses


# This function changes the learning rate as the model trains.
# If the learning rate is too high, training tends to be unstable and it's harder to converge on an optimal set of weights. 
# But, if learning rate is too low, learning is too slow and you won't converge in a reasonable time frame. A good compromise 
# is to start out with a high learning rate and then reduce it over time. 
def exp_lr_scheduler(optimizer, epoch, init_lr=BASE_LR, lr_decay_epoch=EPOCH_DECAY):
    """Decay learning rate by a factor of DECAY_WEIGHT every lr_decay_epoch epochs."""
    lr = init_lr * (DECAY_WEIGHT**(epoch // lr_decay_epoch))

    if epoch % lr_decay_epoch == 0:
        print('LR is set to {}'.format(lr))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return optimizer

def train_model(model, criterion, optimizer, lr_scheduler=None, num_epochs=100):
    since = time.time()
    if use_gpu and torch.cuda.is_available():
        model = model.cuda()
    
    best_model = model
    best_acc = 0.0
    accuracies = []
    losses = []
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))

        # if lr_scheduler is not None:
        #     optimizer = lr_scheduler(optimizer, epoch)

        model.train()
        running_loss = 0.0
        running_corrects = 0
        running_total = 0

        for inputs, labels in dset_loaders['train']:
            if use_gpu:
                inputs = inputs.cuda()
                labels = labels.cuda()

            optimizer.zero_grad()
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels)
            running_total += inputs.size(0)

        lr_scheduler.step()
        epoch_loss = running_loss / running_total
        epoch_acc = running_corrects.double() / running_total
        print('{} Loss: {:.4f} Acc: {:.4f}'.format('Training', epoch_loss, epoch_acc))
        accuracies.append(epoch_acc)
        losses.append(epoch_loss)
        best_acc = max(best_acc, epoch_acc)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best Acc: {:4f}'.format(best_acc))

    return best_model, accuracies, losses

In [33]:
from torch.optim.lr_scheduler import ExponentialLR


class ThreeLayerModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ThreeLayerModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.fc3(x)
        return torch.softmax(x, dim=1)

model_ft = ThreeLayerModel(1781, 1500, NUM_CLASSES)
print(list(model_ft.parameters()))


criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.0001)
# optimizer_ft = optim.RMSprop(model_ft.parameters(), lr=0.0001)

scheduler = ExponentialLR(optimizer_ft, gamma=EPOCH_DECAY)

[Parameter containing:
tensor([[-0.0134,  0.0159, -0.0116,  ...,  0.0188,  0.0180,  0.0075],
        [ 0.0071,  0.0203,  0.0173,  ...,  0.0006, -0.0035,  0.0035],
        [ 0.0004,  0.0120, -0.0180,  ...,  0.0098, -0.0098, -0.0211],
        ...,
        [ 0.0198,  0.0042,  0.0006,  ...,  0.0147, -0.0020,  0.0013],
        [ 0.0164, -0.0004, -0.0105,  ...,  0.0050,  0.0175,  0.0063],
        [ 0.0101,  0.0048,  0.0193,  ...,  0.0019, -0.0212,  0.0124]],
       requires_grad=True), Parameter containing:
tensor([ 0.0211, -0.0115, -0.0007,  ..., -0.0037, -0.0208, -0.0005],
       requires_grad=True), Parameter containing:
tensor([[-0.0006,  0.0111,  0.0024,  ..., -0.0185, -0.0194, -0.0177],
        [-0.0242,  0.0236, -0.0098,  ...,  0.0050, -0.0192, -0.0056],
        [ 0.0207,  0.0247, -0.0064,  ...,  0.0068,  0.0027, -0.0163],
        ...,
        [ 0.0099,  0.0238,  0.0191,  ...,  0.0022,  0.0080, -0.0174],
        [ 0.0123, -0.0098,  0.0003,  ...,  0.0235, -0.0002, -0.0093],
        [ 0

In [8]:
model_ft = ThreeLayerModel(1781, 1500, NUM_CLASSES)
model_ft.load_state_dict(torch.load('small_unif.pt'))
model_ft.eval()

ThreeLayerModel(
  (fc1): Linear(in_features=1781, out_features=1500, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=1500, out_features=1500, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=1500, out_features=500, bias=True)
)

In [34]:
# Run the functions and save the best model in the function model_ft.
model_ft, accuracies, losses = train_model(model_ft, criterion, optimizer_ft, scheduler, num_epochs=6)

for split in ['train', 'val']:
    print(split, "accuracies by epoch:", accuracies)
    print(split, "losses by epoch:", losses)

# Save model
torch.save(model_ft.state_dict(), '4_2.pt')

Epoch 0/5
Training Loss: 6.2146 Acc: 0.0000
Epoch 1/5
Training Loss: 6.2144 Acc: 0.0160
Epoch 2/5
Training Loss: 6.2176 Acc: 0.0000
Epoch 3/5


KeyboardInterrupt: 

Small model maxes out at 0.668 accuracy