In [7]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import pandas as pd
import numpy as np
import collections
import json
import glob

In [8]:
training_files = glob.glob('training_data/oval/*.csv')
training_data = {}

DATA = pd.DataFrame()
for training_file in training_files:
    df = pd.read_csv(training_file, index_col=False)
    DATA = pd.concat([DATA, df])
DATA_LENGTH = DATA.shape[0]
for training_file in training_files:
    df = pd.read_csv(training_file, index_col=False)
    df_std = (df - DATA.mean()) / DATA.std()
    for column in df_std:
        df_std[column].fillna(df[column], inplace=True)
    training_data[training_file] = df_std.values


TARGET_SIZE = 3
INPUT_SIZE = 22
HIDDEN_SIZE = 22

NUM_LAYERS = 1
BATCH_SIZE = 1
NUM_EPOCHS = 50
MILESTONES = [20, 30, 40]
GAMMA = 0.1
LEARNING_RATE = 1e-2

In [9]:
def save_checkpoint(state, is_best, filepath):
    torch.save(state, filepath)
    if is_best:
        torch.save(state, 'best_model/best_model_full.pth.tar')
        torch.save(state['state_dict'], 'best_model/best_model_state.pth.tar')

In [10]:
class LSTMDriver(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, batch_size):
        super(LSTMDriver, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size
        self.batch_size = batch_size
        
        super(LSTMDriver, self).__init__()

        self.lstm = nn.LSTM(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers)
        
        self.hidden = self.init_hidden()
        
        self.linear = nn.Linear(self.hidden_size, self.output_size)

    def init_hidden(self, hidden_state=None):
        if hidden_state is None:
            return (autograd.Variable(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)),
                    autograd.Variable(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)))
        else:
            return (autograd.Variable(hidden_state[0].data), autograd.Variable(hidden_state[1].data))
    
    def forward(self, x):
        lstm_out, hidden_out = self.lstm(x.view(1, -1), self.hidden)
        linear_out = self.linear(lstm_out)
        self.hidden = (autograd.Variable(hidden_out[0].data), autograd.Variable(hidden_out[1].data))
        
        return linear_out

In [11]:
checkpoint = torch.load('model_checkpoints/checkpoint.pth.tar')

model = LSTMDriver(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, TARGET_SIZE, BATCH_SIZE)
model.load_state_dict(checkpoint['state_dict'])

criterion = nn.MSELoss()

losses = [checkpoint['min_loss']]

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
optimizer.load_state_dict(checkpoint['optimizer'])
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=MILESTONES, gamma=GAMMA, last_epoch=checkpoint['epoch']-1)

min_loss = checkpoint['min_loss']

for epoch in np.arange(checkpoint['epoch'] - 1, NUM_EPOCHS):
    print('Epoch [%d/%d]' %(epoch+1, NUM_EPOCHS))
    
    current_loss = 0
    is_best = False
    scheduler.step()
    for key in training_data:
        print('--- parsing track: %s/%s' % (key.split('/')[1], key.split('/')[2]))

        model.init_hidden()
        track_data = training_data[key]
        for row in track_data:
            optimizer.zero_grad()
            targets = row[0:3]
            inputs = row[3:]

            targets_variable = autograd.Variable(torch.Tensor(targets))
            inputs_variable = autograd.Variable(torch.Tensor(inputs))

            outputs_variable = model(inputs_variable)
            loss = criterion(outputs_variable, targets_variable)
            loss.backward()
            optimizer.step()

            current_loss += loss.data[0]
    
    current_loss = current_loss / DATA_LENGTH
    if current_loss < min_loss:
        print('---|--- best model so far found: %f' % current_loss)
        min_loss = current_loss
        is_best = True
    else:
        print('---|--- model is worse: %f' % current_loss)
    
    save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'min_loss': min_loss,
            'optimizer' : optimizer.state_dict(),
        }, is_best, filepath='model_checkpoints/checkpoint.pth.tar')
    
    losses.append(current_loss)
    print('-------------------------------------------------------')

Epoch [36/50]
--- parsing track: oval/race_CEYP.csv
--- parsing track: oval/race_XIWR.csv
--- parsing track: oval/race_LNJK.csv
--- parsing track: oval/race_DNTY.csv
--- parsing track: oval/race_XVQL.csv
---|--- model is worse: 0.127350
-------------------------------------------------------
Epoch [37/50]
--- parsing track: oval/race_CEYP.csv
--- parsing track: oval/race_XIWR.csv
--- parsing track: oval/race_LNJK.csv
--- parsing track: oval/race_DNTY.csv
--- parsing track: oval/race_XVQL.csv
---|--- model is worse: 0.101507
-------------------------------------------------------
Epoch [38/50]
--- parsing track: oval/race_CEYP.csv
--- parsing track: oval/race_XIWR.csv
--- parsing track: oval/race_LNJK.csv
--- parsing track: oval/race_DNTY.csv
--- parsing track: oval/race_XVQL.csv
---|--- model is worse: 0.097526
-------------------------------------------------------
Epoch [39/50]
--- parsing track: oval/race_CEYP.csv
--- parsing track: oval/race_XIWR.csv
--- parsing track: oval/race_LN