In [1]:
# import relevant libraries
import torch
import torch.nn as nn
import numpy as np

In [5]:
""" Class for defining the Vanilla LSTM Network """
class VanillaLSTMNet(nn.Module):
    def __init__(self):
        """" Initialize the network here. You can use a combination of nn.LSTMCell and nn.Linear. 
        Number of layers and hidden size is up to you. Hint: A network with less than 3 layers and 
        64 dimensionality should suffice.
        """
        super(LSTMNet, self).__init__()
        
        # Inputs to the LSTMCell's are (input, (h_0, c_0)):
        # 1. input of shape (batch, input_size): tensor containing input 
        # features
        # 2a. h_0 of shape (batch, hidden_size): tensor containing the 
        # initial hidden state for each element in the batch.
        # 2b. c_0 of shape (batch, hidden_size): tensor containing the 
        # initial cell state for each element in the batch.
        
        # Outputs: h_1, c_1
        # 1. h_1 of shape (batch, hidden_size): tensor containing the next 
        # hidden state for each element in the batch
        # 2. c_1 of shape (batch, hidden_size): tensor containing the next 
        # cell state for each element in the batch
        
        # set parameters for network architecture
        embedding_size = 64
        rnn_size = 128
        input_size = 2
        output_size = 2
        dropout_prob = 0.5 
        
        # linear layer to embed the input position
        self.input_embedding_layer = nn.Linear(input_size, embedding_size)
        
        # define lstm cell
        # self.lstm_cell = nn.LSTMCell(embedding_size, rnn_size) # uncomment later for embedding
        self.lstm_cell = nn.LSTMCell(embedding_size, embedding_size)

        # linear layer to map the hidden state of LSTM to output
        # self.output_layer = nn.Linear(rnn_size, output_size) # uncomment later for embedding
        self.output_layer = nn.Linear(embedding_size, output_size)
        
        # ReLU and dropout unit
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_prob)
        
        pass
 
    def forward(self, observed_seq, pred_len = 0):
        """ This function takes the input sequence and predicts the output sequence. 
        
            args:
                observed_seq (torch.Tensor) : Input sequence with shape <batch size x sequence length x number of dimensions>
                pred_len (int) : Length of the sequence to be predicted.

        """
        
        '''
        Forward pass for the model
        params:
        input_data: Input positions
        grids: Grid masks
        hidden_states: Hidden states of the peds
        cell_states: Cell states of the peds
        PedsList: id of peds in each frame for this sequence
        returns:
        outputs_return: Outputs corresponding to bivariate Gaussian distributions
        hidden_states
        cell_states
        '''
        
        output_seq = []
        
        # initialize cell states & hidden states
        ht = torch.zeros(observed_seq.size(0), self.embedding_size, dtype=torch.double)
        ct = torch.zeros(observed_seq.size(0), self.embedding_size, dtype=torch.double)
        
        # Iterate over the observed sequence, and predict output sequence one step at a time.
        for i, input_seq in enumerate(observed_seq.chunk(observed_seq.size(1), dim=1)):
            ht, ct = self.lstm_cell(input_seq, (ht, ct))
            out = self.output_layer(ht)
            output_seq += [out]
            
        for i in range(pred_len):
            ht, ct = self.lstm_cell(out, (ht, ct))
            out = self.output_layer(ht)
            output_seq += [out]
            
        output_seq = torch.stack(output_seq, 1).squeeze(2) # convert list to tensor
            
        return output_seq

In [10]:
def main():
    
    # define parameters for training and testing loops
    num_epoch = 100
    pred_freq = 1
    pred_len = 6
    learning_rate = 0.003
    
    # get data
    train_input, train_target, test_input, test_target = getData()

    # define the network and criterion
    vanilla_lstm_net = VanillaLSTMNet()
    vanilla_lstm_net.double() # casts tensor to double
    criterion = nn.MSELoss() # MSE works best for difference between predicted and actual coordinate paths

    # define the optimizer
    optimizer = optim.Adam(vanilla_lstm_net.parameters(), lr=learning_rate)

    # initialize lists for capturing losses
    train_loss = []
    test_loss = []

    '''train for 'num_epoch' epochs and test every 'pred_freq' epochs & when predicting use pred_len=6'''
    
    for i in range(num_epoch):
        print('========== Epoch: {cur_epoch} / {total_epochs} =========='.format(cur_epoch=i, total_epochs=num_epoch))
        def closure():
            optimizer.zero_grad() # zero out gradients
            out = vanilla_lstm_net(train_input) # forward pass of lstm network for training
            cur_train_loss = criterion(out, train_target) # calculate MSE loss
            print('Current training loss: {}'.format(cur_train_loss.item())) # print current training loss
            train_loss.append(loss.item())
            loss.backward() # backward prop
            return loss
        optimizer.step(closure) # update weights
    
        # test the loop every pred_freq times
        if i%pred_freq == (pred_freq-1) : 
            pred = vanilla_lstm_net(test_input, pred_len=pred_len) # forward pass of lstm network for testing
            cur_test_loss = criterion(pred[:, :-pred_len], test_target)
            test_loss.append(cur_test_loss.item())
            print('Current test loss: {}'.format(cur_test_loss.item()))
            # plotTestResults(pred, i)

    ''' visualize losses vs. epoch'''
                  
    train_loss_avg = [] # only train loss, since test loss is already formatted

    # plot training loss v/s epoch
    plt.figure()
    plt.title("Training loss (log scale) vs. epoch")
    
    # loss is log-scaled for better visualization, since hits plateau quickly
    for i in range(0,10):
        train_loss_avg.append(sum(train_loss[i*20:(i+20)*20])/20)
    plt.plot(list(range(1, num_epoch+1)), np.log(np.asarray(train_loss_avg)))
    
    # plot test loss v/s epoch
    plt.figure()
    plt.title("Test loss (log scale) vs. epoch")
    plt.plot(list(range(1, num_epoch+1)), np.log(np.asarray(test_loss)))

In [1]:
if __name__ == '__main__':
    main()

NameError: name 'main' is not defined

In [None]:
import trajectories
path1='/home/roongtaaahsih/ped_traj/self/datasets/eth/test/biwi_eth.txt'
trajectories.read_file(pa)