In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torch.nn.utils.rnn import pack_sequence, pad_sequence, pack_padded_sequence, pad_packed_sequence

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
train_data = torch.from_numpy(np.load('./data/train16.npy'))
test_data = torch.from_numpy(np.load('./data/test16.npy'))

In [34]:
class GRUNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers = 2, drop_prob=0.2):
        super(GRUNet, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, input, hidden):
        input = pack_sequence(input, False)
        out, hidden = self.gru(input, hidden)
        
        out = self.fc(self.relu(out[:,-1]))
        return out, h
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros((self.n_layers, batch_size, self.hidden_dim)).to(device)
        return hidden

In [35]:
model = GRUNet(276,256,276,2)

In [36]:
model.init_hidden(16).shape

torch.Size([2, 16, 256])

In [3]:
def clip_seq(seq):
    seq_ = []
    for x in seq:
        while len(x) > 500:
            N = len(x)
            if N > 600:
                seq_.append(x[:500])
                x = x[500:]
            else:
                seq_.append(x[:N//2])
                x = x[N//2:]
        seq_.append(x)
    return seq_

train_seq = np.loadtxt('./data/TrainSequences.txt', dtype = np.int32)
test_seq = np.loadtxt('./data/TestSequences.txt', dtype = np.int32)

train_seq = [np.where(train_seq == i)[0] for i in np.unique(train_seq)]
test_seq = [np.where(test_seq == i)[0] for i in np.unique(test_seq)]

train_seq = clip_seq(train_seq)
test_seq = clip_seq(test_seq)

train_sequences = [train_data[i] for i in train_seq]
test_sequences = [test_data[i] for i in test_seq]

In [5]:
def train(train_loader, learn_rate, hidden_dim=256, EPOCHS=5):
    # Setting common hyperparameters
    input_dim = next(iter(train_loader))[0].shape[2]
    output_dim = 1
    n_layers = 2

    model = GRUNet(input_dim, hidden_dim, output_dim, n_layers)
    model.to(device)
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)
    
    model.train()
    print("Starting Training of {} model".format(model_type))
    epoch_times = []

    for epoch in range(1,EPOCHS+1):
        start_time = time.clock()
        h = model.init_hidden(batch_size)
        avg_loss = 0.
        counter = 0
        for x, label in train_loader:
            counter += 1
            if model_type == "GRU":
                h = h.data
            else:
                h = tuple([e.data for e in h])
            model.zero_grad()
            
            out, h = model(x.to(device).float(), h)
            loss = criterion(out, label.to(device).float())
            loss.backward()
            optimizer.step()
            avg_loss += loss.item()
            if counter%200 == 0:
                print("Epoch {}......Step: {}/{}....... Average Loss for Epoch: {}".format(epoch, counter, len(train_loader), avg_loss/counter))
        current_time = time.clock()
        print("Epoch {}/{} Done, Total Loss: {}".format(epoch, EPOCHS, avg_loss/len(train_loader)))
        print("Total Time Elapsed: {} seconds".format(str(current_time-start_time)))
        epoch_times.append(current_time-start_time)
    print("Total Training Time: {} seconds".format(str(sum(epoch_times))))
    return model

def evaluate(model, test_x, test_y, label_scalers):
    model.eval()
    outputs = []
    targets = []
    start_time = time.clock()
    for i in test_x.keys():
        inp = torch.from_numpy(np.array(test_x[i]))
        labs = torch.from_numpy(np.array(test_y[i]))
        h = model.init_hidden(inp.shape[0])
        out, h = model(inp.to(device).float(), h)
        outputs.append(label_scalers[i].inverse_transform(out.cpu().detach().numpy()).reshape(-1))
        targets.append(label_scalers[i].inverse_transform(labs.numpy()).reshape(-1))
    print("Evaluation Time: {}".format(str(time.clock()-start_time)))
    sMAPE = 0
    for i in range(len(outputs)):
        sMAPE += np.mean(abs(outputs[i]-targets[i])/(targets[i]+outputs[i])/2)/len(outputs)
    print("sMAPE: {}%".format(sMAPE*100))
    return outputs, targets, sMAPE

In [6]:
# train_dataset = TensorDataset(train_data[:,:276].cuda())
# train_loader = DataLoader(train_dataset, 1024, True)

In [7]:
# train_packed = pack_sequence(train_sequences, enforce_sorted = False)
# test_packed = pack_sequence(test_sequences, enforce_sorted = False)

In [13]:
train_padded, lengths = pad_packed_sequence(train_packed)
out = nn.Linear(276,276)(train_padded)

In [14]:
out.shape

torch.Size([385, 16, 276])

In [9]:
train_packed

PackedSequence(data=tensor([[ 0.0000e+00,  1.0479e+00,  0.0000e+00,  ...,  2.7990e-04,
          1.1998e-04, -2.6011e-04],
        [ 0.0000e+00,  1.0479e+00,  0.0000e+00,  ...,  2.0003e-04,
         -1.4000e-03,  3.0994e-04],
        [ 0.0000e+00,  1.0459e+00,  0.0000e+00,  ..., -1.1070e-02,
          1.0857e-02, -1.4198e-02],
        ...,
        [ 0.0000e+00,  1.0508e+00,  0.0000e+00,  ..., -1.4997e-04,
         -2.9981e-05,  4.0007e-04],
        [ 0.0000e+00,  1.0508e+00,  0.0000e+00,  ..., -4.3011e-04,
         -1.1797e-03, -4.5991e-04],
        [ 0.0000e+00,  1.0508e+00,  0.0000e+00,  ..., -8.7976e-04,
         -1.1101e-03,  1.1998e-04]]), batch_sizes=tensor([16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16,

In [8]:
train_bones = [x[:,:276] for x in train_sequences]
train_packed = pack_sequence(train_bones[:16], enforce_sorted = False).float()

In [41]:
order = torch.randperm(len(train_bones))
batch_order = order[:16]
order = order[16:]
batch = [train_bones[x] for x in batch_order]
batch_packed = pack_sequence(batch, enforce_sorted = False).float()

In [12]:
gru = nn.GRU(276,276,2)

In [31]:
out = gru(train_packed, torch.zeros((2,16,276)))
seq, length = pad_packed_sequence(out[0])

In [45]:
nn.Linear(276,276)(batch_packed)

AttributeError: 'PackedSequence' object has no attribute 'dim'

In [35]:
model = GRUNet(276, 276, 276, 2).cuda()

In [36]:
hidden = model.init_hidden(16)
model(train_packed, hidden)

PackedSequence(data=tensor([[ 0.0044,  0.0541,  0.0295,  ...,  0.0005,  0.0049, -0.0282],
        [ 0.0121,  0.0608,  0.0244,  ..., -0.0323,  0.0148, -0.0342],
        [ 0.0077,  0.0697,  0.0109,  ..., -0.0230, -0.0018, -0.0731],
        ...,
        [-0.2398,  0.2326,  0.1549,  ..., -0.1636,  0.1407, -0.1069],
        [-0.1497,  0.2813,  0.1841,  ..., -0.1565,  0.1862, -0.1695],
        [-0.2340,  0.1404,  0.0452,  ..., -0.0936,  0.1298, -0.1311]],
       device='cuda:0', grad_fn=<CudnnRnnBackward>), batch_sizes=tensor([16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
 

TypeError: tuple indices must be integers or slices, not tuple

In [26]:
train_packed

PackedSequence(data=tensor([[ 0.0000e+00,  1.0479e+00,  0.0000e+00,  ...,  2.7990e-04,
          1.1998e-04, -2.6011e-04],
        [ 0.0000e+00,  1.0479e+00,  0.0000e+00,  ...,  2.0003e-04,
         -1.4000e-03,  3.0994e-04],
        [ 0.0000e+00,  1.0459e+00,  0.0000e+00,  ..., -1.1070e-02,
          1.0857e-02, -1.4198e-02],
        ...,
        [ 0.0000e+00,  1.0508e+00,  0.0000e+00,  ..., -1.4997e-04,
         -2.9981e-05,  4.0007e-04],
        [ 0.0000e+00,  1.0508e+00,  0.0000e+00,  ..., -4.3011e-04,
         -1.1797e-03, -4.5991e-04],
        [ 0.0000e+00,  1.0508e+00,  0.0000e+00,  ..., -8.7976e-04,
         -1.1101e-03,  1.1998e-04]], device='cuda:0', dtype=torch.float64), batch_sizes=tensor([16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1

In [None]:
nn.GRU()