# LSTM implementation for TORCS driver

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader
import glob
from sklearn.preprocessing import Imputer
from collections import defaultdict

## LSTM Network

In [2]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, batch_size):
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        
        super(RNN, self).__init__()        
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True
        )
        self.out = nn.Linear(hidden_size, output_size)        
        self.hidden = self.init_hidden()
        
    def init_hidden(self, x=None):
        if x == None:
            return (Variable(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)),
                    Variable(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)))
        else:
            return (Variable(x[0].data),Variable(x[1].data))
        
    def forward(self, x):
        lstm_out, self.hidden_out = self.lstm(x, self.hidden)
        output = self.out(lstm_out.view(len(x), -1))
        self.hidden = self.init_hidden(self.hidden_out)
        return output

## Train network

In [3]:
INPUT_SIZE = 22
HIDDEN_SIZE = 160
NUM_LAYERS = 3
BATCH_SIZE = 100
NUM_EPOCHS = 10
LEARNING_RATE = 0.001

rnn = RNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, 3, BATCH_SIZE)

optimizer = torch.optim.Adam(rnn.parameters(), lr=LEARNING_RATE)
criterion = nn.MSELoss()

In [4]:
training_files = glob.glob('train_data/*.csv') + glob.glob('basic_data/*.csv')

# Read all training sets
training_sets = defaultdict(lambda: dict())

for f in training_files:    
    train_ds = pd.read_csv(f)
    
    X_train = train_ds.iloc[:, 3:].values
    y_train = train_ds.iloc[:, :3].values
    
    imputer = Imputer(missing_values='NaN', strategy='mean', axis=0)
    imputer = imputer.fit(X_train)
    X_train = imputer.transform(X_train)
    
    X_train = torch.from_numpy(X_train).float()
    y_train = torch.from_numpy(y_train).float()
    
    dataset = TensorDataset(X_train, y_train)
    
    training_sets[f] = dataset

In [5]:
for epoch in range(NUM_EPOCHS):
    print('Epoch [%d/%d]' %(epoch+1, NUM_EPOCHS))
    
    for f in training_files:
        print('  training set: %s' %(f[f.find('/')+1:]))        
        train_loader = DataLoader(dataset=training_sets[f], batch_size=BATCH_SIZE, shuffle=False)    
        rnn.init_hidden()

        for i, (X, y) in enumerate(train_loader):
            if (len(X) != BATCH_SIZE):
                continue

            data = Variable(X.view(-1, 1, INPUT_SIZE))
            target = Variable(y)

            optimizer.zero_grad()
            prediction = rnn(data)
            loss = criterion(prediction, target)
            loss.backward()
            optimizer.step()

            if (i+1) % BATCH_SIZE == 0:
                print('    step: [%d/%d], loss: %.4f'
                      %(i+1, len(training_sets[f].target_tensor)//BATCH_SIZE, loss.data[0]))

print('Training done')



Epoch [1/10]
  training set: f-speedway.csv
  training set: alpine-1.csv
    step: [100/170], loss: 0.1285
  training set: aalborg.csv
  training set: alpine_1_2_laps.csv
    step: [100/282], loss: 0.0005
    step: [200/282], loss: 0.0128
  training set: forza_3_laps.csv
    step: [100/378], loss: 0.0000
    step: [200/378], loss: 0.0018
    step: [300/378], loss: 0.0003
  training set: cg_speedway_2_laps.csv
  training set: cg_track2_2_laps.csv
    step: [100/143], loss: 0.0006
  training set: ruudskogen.csv
    step: [100/147], loss: 0.0046
  training set: brodenhach_2_laps.csv
    step: [100/180], loss: 0.0022
  training set: e_road_2_laps.csv
    step: [100/147], loss: 0.0005
Epoch [2/10]
  training set: f-speedway.csv
  training set: alpine-1.csv
    step: [100/170], loss: 0.0814
  training set: aalborg.csv
  training set: alpine_1_2_laps.csv
    step: [100/282], loss: 0.0003
    step: [200/282], loss: 0.0106
  training set: forza_3_laps.csv
    step: [100/378], loss: 0.0000
    s

## Save model parameters

In [6]:
torch.save(rnn.state_dict(), 'rnn_params.pt')

In [7]:
torch.save(rnn, 'whole_net.pt')

  "type " + obj.__name__ + ". It won't be checked "


### Backup code

In [None]:
# for file in training_files:
#     # Train the network for each training session
#     print('Training file: %s' %(file))
    
#     train_ds = pd.read_csv(file)
#     X_train = train_ds.iloc[:, 3:].values
#     y_train = train_ds.iloc[:, :3].values
    
#     imputer = Imputer(missing_values='NaN', strategy='mean', axis=0)
#     imputer = imputer.fit(X_train)
#     X_train = imputer.transform(X_train)
    
#     X_train = torch.from_numpy(X_train).float()
#     y_train = torch.from_numpy(y_train).float()
    
#     dataset = TensorDataset(X_train, y_train)
    
#     train_loader = DataLoader(dataset=dataset, batch_size=BATCH_SIZE, shuffle=False)
    
#     rnn.init_hidden()
    
#     for epoch in range(NUM_EPOCHS):
#         for i, (X, y) in enumerate(train_loader):
#             if (len(X) != BATCH_SIZE):
#                 continue
            
#             data = Variable(X.view(-1, 1, INPUT_SIZE))
#             target = Variable(y)
            
#             optimizer.zero_grad()
#             prediction = rnn(data)
#             loss = criterion(prediction, target)
#             loss.backward()
#             optimizer.step()
            
#             if (i+1) % 30 == 0:
#                 print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
#                       %(epoch+1, NUM_EPOCHS, i+1, len(X_train)//BATCH_SIZE, loss.data[0]))