In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from data_extraction import *
from model import *
import math
from torch.nn.utils.rnn import pad_packed_sequence
import config

TRAIN_USERS = ['a', 'b', 'c', 'e', 'g', 'h', 'i']
TEST_USERS = ['d', 'f']
PATH_PA = './activity_recognition'

feature_size = 3
hidden_size = 20
num_layers = 3

batch_size = 30
step_size = 10

learning_rate = 0.05

def train(rnn, lin, X, y, lens, percent_train=0.6, num_epochs=10, loss_fn=nn.CrossEntropyLoss(size_average=False), learning_rate=0.05, num_batches=10):
    train_errors = torch.zeros(num_epochs)
    test_errors = torch.zeros(num_epochs)

    # Set initial states - never changes at t=0
    h0 = Variable(torch.zeros(num_layers, batch_size, hidden_size).double(), requires_grad=True)
    # optimizer = torch.optim.Adam([X], lr=1)

    # run a loop for each epoch of training
    for t in range(num_epochs):
        # split into sets for training (60%), cross_validation (40%) - outputs np ndarrays
        X_train, y_train, lens_train, X_test, y_test, lens_test = training_test_split(X, y, lens, percent_train=percent_train)
        # print(X_train.type(), y_train.type(), lens_train.type(), X_test.type(), y_test.type(), lens_test.type() )
        # X_train = T(X_train)
        # X_train = T(X_test)
        # y_train = torch.cat([torch.from_numpy(label) for label in y_train])
        # y_test = torch.cat([torch.from_numpy(label) for label in y_test])
        # print(y_train)
        # lens_train = torch.from_numpy(lens_train)
        # lens_test = torch.from_numpy(lens_test)

        # create training batches (padded sequences) of size batch_size
        batches, batch_labels = make_training_batches(X_train, y_train, lens_train, batch_size, num_batches=num_batches)   
        print('Checkpoint 0!', len(batches), len(batch_labels), len(batch_labels[0]), len(batch_labels[0][0]), 1)

        for batch in batches:
            print('Batches!!!', h0.data.type())
            y_pred, _ = rnn(batch, h0)
            # print('Hidden layer output', test.size())
            y_pred, y_pred_lens = pad_packed_sequence(y_pred, batch_first=True)
            # print('pred size', y_pred.size(), 'Num batch labels', len(batch_labels[0]))
            
            # identify and print the loss from the prediction
            # print('torched true output', y_pred.size(), batch_labels[0])
            for batch_pred in y_pred: 
                # print('Simple Batch: ', batch_pred.size(), len(batch_labels), len(batch_labels[0]))
                # for seq in batch_pred:
                #     # for example in seq:
                #     print('Simple Seq: ',seq)
                pred = lin(batch_pred)
                # print('Prediction:','size', pred.size(), 'label[0]', batch_labels)
                # print('actual', pred[0], 'variable label[0][0]', T(batch_labels[0][0].tolist()))
                print('Some sizes', y_pred.size(), len(batch_labels), len(batch_labels[0]), batch_labels[0], batch_labels[0][0])
                loss = loss_fn(pred[0], Variable(torch.LongTensor(batch_labels[0]), requires_grad=False))
                # Variable(torch.LongTensor(batch_labels[0].tolist()), requires_grad=False)
                print("Loss after {}\t steps is {}".format(t, loss.data[0]))

        # Zero the gradients before running the backward pass.
        lin.zero_grad()
        rnn.zero_grad()

        # Backward pass: compute gradient of the loss with respect to all the learnable
        # parameters of the rnn. Internally, the parameters of each Module are stored
        # in Variables with requires_grad=True, so this call will compute gradients for
        # all learnable parameters in the rnn.
        loss.backward()
        
        # Update the weights using gradient descent. Each parameter is a Variable, so
        # we can access its data and gradients like we did before.
        for param in lin.parameters():
            param.data -= learning_rate * param.grad.data

        for param in rnn.parameters():
            param.data -= learning_rate * param.grad.data

        optimizer.step()

        # record training and test errors
        train_errors[t] = 0
        test_errors[t] = 0

    return rnn, lin, train_errors, test_errors

"""
Identify a training and cross-validation subset of the data given a np X, y 
input.
"""
def training_test_split(X, y, lens, percent_train=0.6):
    num_train_examples = math.ceil(percent_train*X.size(0))
    idx = torch.randperm(X.size(0)) # sorting maintains the order of the lengths of sequences

    train_idx, _ = torch.sort(idx[:num_train_examples], 0)
    test_idx, _  = torch.sort(idx[num_train_examples:], 0)

    X_train = torch.index_select(X, 0, train_idx)
    X_test  = torch.index_select(X, 0, test_idx)

    y_train = torch.index_select(y, 0, train_idx)
    y_test  = torch.index_select(y, 0, test_idx)

    lens_train = torch.index_select(lens, 0, train_idx).double()
    lens_test = torch.index_select(lens, 0, test_idx).double()

    return X_train, y_train, lens_train, X_test, y_test, lens_test

"""
Create batches (of type packed_sequence) of the desired batch size, and annotate
them with sequence labels, lengths.
"""
def make_training_batches(X, y, lens, batch_size, num_batches=10):
    batches = []
    batch_labels = []

    for b in range(num_batches):
        idx = torch.randperm(X.size(0))
        # print('idx sort', idx[:batch_size], torch.sort(idx[:batch_size]))
        batch_idx, _ = torch.sort(idx[:batch_size])
        # batch_idx = sorted(idx[:batch_size])

        seqs = torch.index_select(X, 0, batch_idx)
        labels = torch.index_select(y, 0, batch_idx)
        # convert to list for passing to padding function
        batch_lens = torch.index_select(lens, 0, batch_idx).tolist()

        # convert to Variables, then to batched padded sequences and train the batches
        batch = Variable(seqs, requires_grad=False)
        # print('Batch Info', batch.size())
        batch = pack_padded_sequence(batch, batch_lens, batch_first=True)
        # print('Sequence dims', seqs.data.size())
        # print('batch info')
        batches.append(batch)
        batch_labels.append(labels)
    return (batches, batch_labels)


In [2]:
# define loss function to return 1 if correct, -1 if incorrect
loss_fn = nn.CrossEntropyLoss(size_average=False)
learning_rate = learning_rate

rnn = nn.RNN(feature_size, hidden_size, num_layers)
lin = nn.Linear(hidden_size, 1)
X, y, lens = getTrainingSequences(TRAIN_USERS, step_size, path=PATH_PA)

rnn, lin, train_errors, test_errors = train(rnn, lin, X, y, lens, num_epochs=1000, loss_fn=loss_fn)

  padded_seqs = np.zeros((len(seqs), lens[0], 3))
  padded_labels = np.zeros((len(labels), lens[0]))


Checkpoint 0! 10 10 30 6603 1
Batches!!! torch.DoubleTensor


TypeError: addmm_ received an invalid combination of arguments - got (int, int, torch.DoubleTensor, torch.FloatTensor), but expected one of:
 * (torch.DoubleTensor mat1, torch.DoubleTensor mat2)
 * (torch.SparseDoubleTensor mat1, torch.DoubleTensor mat2)
 * (float beta, torch.DoubleTensor mat1, torch.DoubleTensor mat2)
 * (float alpha, torch.DoubleTensor mat1, torch.DoubleTensor mat2)
 * (float beta, torch.SparseDoubleTensor mat1, torch.DoubleTensor mat2)
 * (float alpha, torch.SparseDoubleTensor mat1, torch.DoubleTensor mat2)
 * (float beta, float alpha, torch.DoubleTensor mat1, torch.DoubleTensor mat2)
 * (float beta, float alpha, torch.SparseDoubleTensor mat1, torch.DoubleTensor mat2)
