In [None]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

import numpy as np
import pandas as pd
import math
import glob

from model import LSTMDriver

import logging
logging.basicConfig(filename='training.log',level=logging.DEBUG)

In [None]:
PROJECT_ROOT = '../../../..'
LEARNED_DRIVER = 'snakeoil_miner/data'
DIFFICULTY = 'easy'

TARGET_SIZE = 3
INPUT_SIZE = 22
HIDDEN_SIZE = 75

NUM_LAYERS = 2
BATCH_SIZE = 1
NUM_EPOCHS = 100
LEARNING_RATE = 1e-3


TRAINING_FILES = glob.glob('/'.join([PROJECT_ROOT, LEARNED_DRIVER, DIFFICULTY, '*.csv']))
TRAINING_DATA = {}
for FILE in TRAINING_FILES:
    DF = pd.read_csv(FILE, index_col=False)
    TRAINING_DATA[FILE] = DF.values

VALIDATION_FILES = glob.glob('/'.join([PROJECT_ROOT, LEARNED_DRIVER, 'validation', '*.csv']))
VALIDATION_DATA = {}
for FILE in VALIDATION_FILES:
    DF = pd.read_csv(FILE, index_col=False)
    VALIDATION_DATA[FILE] = DF.values

In [None]:
def save_checkpoint(state, is_best, filepath='latest_checkpoint.pth.tar'):
    torch.save(state, 'checkpoints/' + filepath)
    if is_best:
        torch.save(state, 'checkpoints/best_checkpoint.pth.tar')

In [None]:
def train(training_data, model, criterion):
    loss = 0
    for key in training_data:
        logging.info('--- Parsing track {}-{}'.format(key.split('/')[-2], key.split('/')[-1]))
        print('--- Parsing track {}-{}'.format(key.split('/')[-2], key.split('/')[-1]))
        
        model.hidden = model.init_hidden()
        track_sequence = training_data[key]

        targets = track_sequence[:, 0:3]
        inputs = track_sequence[:, 3:]

        targets_variable = autograd.Variable(torch.Tensor(targets))
        inputs_variable = autograd.Variable(torch.Tensor(inputs),  requires_grad=True)

        outputs_variable = model(inputs_variable)

        track_loss = criterion(outputs_variable, targets_variable)

        track_loss.backward()
        optimizer.step()

        loss += track_loss.data[0]
    return loss

In [None]:
def validate(validation_data, model, criterion):
    loss = 0
    for key in validation_data:
        logging.info('--- Parsing track {}-{}'.format(key.split('/')[-2], key.split('/')[-1]))
        print('--- Parsing track {}-{}'.format(key.split('/')[-2], key.split('/')[-1]))
        
        model.hidden = model.init_hidden()
        track_sequence = validation_data[key]

        targets = track_sequence[:, 0:3]
        inputs = track_sequence[:, 3:]

        targets_variable = autograd.Variable(torch.Tensor(targets), volatile=True)
        inputs_variable = autograd.Variable(torch.Tensor(inputs), volatile=True)

        outputs_variable = model(inputs_variable)

        track_loss = criterion(outputs_variable, targets_variable)

        loss += track_loss.data[0]
    return loss

In [None]:
model = LSTMDriver(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, TARGET_SIZE, BATCH_SIZE)
training_criterion = nn.MSELoss()
validation_criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', verbose=True)
min_loss = math.inf
losses = {
  'training': [],
  'validation': []
}

for epoch in np.arange(NUM_EPOCHS):
    logging.info('Epoch [%d/%d]' %(epoch+1, NUM_EPOCHS))
    print('Epoch [%d/%d]' %(epoch+1, NUM_EPOCHS))
    
    is_best = False

    training_loss = train(TRAINING_DATA, model, training_criterion)
    logging.info('--- TRAINING LOSS: %f' % training_loss)
    print('--- TRAINING LOSS: %f' % training_loss)

    validation_loss = validate(VALIDATION_DATA, model, validation_criterion)
    logging.info('--- VALIDATION LOSS: %f' % validation_loss)
    print('--- VALIDATION LOSS: %f' % validation_loss)

    if validation_loss < min_loss:
        logging.info('--- --- best model found so far: %f' % validation_loss)
        print('--- --- best model found so far: %f' % validation_loss)
        min_loss = validation_loss
        is_best = True

    losses['training'].append(training_loss)
    losses['validation'].append(validation_loss)

    save_checkpoint({
          'epoch': epoch + 1,
          'state_dict': model.state_dict(),
          'min_loss': min_loss,
          'optimizer' : optimizer.state_dict(),
      }, is_best)

    scheduler.step(validation_loss)
    logging.info('-------------------------------------------------------')
    print('-------------------------------------------------------')