In [1]:
import pickle
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

import torch.multiprocessing as multiprocessing


from torch import nn as nn
from torch import optim
from torch import functional as F

In [2]:
input_dir = 'data/pickles/'

In [3]:
label_2_num_mapping = {}
num_2_label_ar= []
data = {"x":[],"y":[]}

label_num = 0

for label in os.listdir(input_dir):
    if label == '.DS_Store':
        continue
    p_file = input_dir + label
    label_2_num_mapping[label]=label_num
    num_2_label_ar.append(label)

    with open(p_file, 'rb') as f:
        label_examples  = pickle.load(f)
        # data dim = num_examples * time * frequency
        label_ar = [np.swapaxes(np.array(example),0,1) for example in label_examples]
        data['x'].extend(label_ar)
        y_labels = [label_num for i in range(0,len(label_examples))]
        data['y'].extend(y_labels)
   
    label_num+=1
          

# Samping function CPU

In [4]:
def sample_batch(n, X,Y):
    """
    takes input and returns padded sample
    n= num_samples
    X = input_featurs_list
    Y = label_list
    """
    label_ids = np.random.randint(low = 0,high = len(X), size=n)
    frequency = len(X[0][0])
    sampled_X = [X[label_id] for label_id in label_ids]
    sampled_y = [Y[label_id] for label_id in label_ids]
    padded_X = []
    
    max_batch_len = max([len(x) for x in sampled_X])
    for x in sampled_X:
        padding_time_count = max_batch_len-len(x)
        if padding_time_count!=0:
            x_padded = np.zeros(shape = (max_batch_len,frequency))
            x_padded[:x.shape[0],:x.shape[1]] = x
            padded_X.append(x_padded)
        else:
            padded_X.append(x)
        
    return np.asarray(padded_X),np.asarray(sampled_y)

# Sampling using GPU

In [5]:
class WavesDatset(Dataset):
    """Loads the wavedataset"""

    def __init__(self, data, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.data = data
        self.transform = transform
        

    def __len__(self):
        return len(self.data['x'])

    def __getitem__(self, idx):
      
        sample = {'x': data['x'][idx], 'label': data['y'][idx]}

        if self.transform:
            sample = self.transform(sample)

        return sample

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        _inputs, label = sample['x'], sample['label']
        return {'x': torch.from_numpy(_inputs).float(),
                'label': torch.from_numpy(np.array(label))
               }


In [6]:
waves_dataset = WavesDatset(data,ToTensor())

In [8]:
# cuda check
for i in range(len(waves_dataset)):
    sample = waves_dataset[i]
    print(i, sample['x'].shape, sample['label'])
    break

0 torch.Size([44, 20]) tensor(0)


In [16]:
wave_dataloader = DataLoader(waves_dataset, batch_size=4,shuffle=True)

# Model

In [17]:
class Word_Predictor_RNN(nn.Module):
    def __init__(self, input_freq=20,hidden_size=256,linear_output_size = 128, n_categories=30):
        super().__init__()
        self.rnn = nn.LSTM(input_freq, hidden_size, batch_first=True, bidirectional=True)
        self.linear1 = nn.Linear(hidden_size, linear_output_size)
        self.linear2 = nn.Linear(linear_output_size, n_categories)

    def forward(self, x, initial_states):
        # input now stores floats and has shape [length, batch_size, embedding_size]
        self.rnn.flatten_parameters()
        x, final_states = self.rnn(x, initial_states)  # TODO
        x = final_states[0][0] * final_states[0][1]
        self.rnn.flatten_parameters()
        x = nn.functional.relu6(self.linear1(x))
        x = self.linear2(x)
        return x



# Model  Hyper Parameters

In [55]:
n_categories = len(label_2_num_mapping)
batch_size = 64
input_freq = 20
hidden_size=256
n_epochs = 1
n_iters = 50


In [56]:
model = Word_Predictor_RNN(input_freq=input_freq,hidden_size=hidden_size,n_categories=n_categories).cuda()
wave_dataloader = DataLoader(waves_dataset, batch_size=batch_size,shuffle=True)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [None]:
print(len(wave_dataloader))

# Model Training

In [59]:
losses = np.zeros(n_epochs) # For plotting

for epoch in range(n_epochs):

    for i_batch, sample_batched in enumerate(wave_dataloader):
        
        _inputs,_labels = sample_batch(50,data['x'],data['y'])
        inputs = torch.autograd.Variable(torch.from_numpy(_inputs).float())
        targets = torch.autograd.Variable(torch.from_numpy(_labels))

        inputs = torch.autograd.Variable(sample_batched['x'].cuda())
        targets = torch.autograd.Variable(sample_batched['label'].cuda())
        outputs = model(inputs, None)

        optimizer.zero_grad()
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        losses[epoch] += loss.item()
        if i_batch%200==0:
            curr_avg_loss = losses[epoch]/(i_batch+1* len(inputs))
            print("Epoch Number : {} Batch_Number: {} Avg Loss: {}".format(epoch,i_batch,curr_avg_loss))
        

    if epoch > 0:
        print(epoch, losses.data[epoch])

    # Use some plotting library
    # if epoch % 10 == 0:
        # show_plot('inputs', _inputs, True)
        # show_plot('outputs', outputs.data.view(-1), True)
        # show_plot('losses', losses[:epoch] / n_iters)

        # Generate a test
        # outputs, hidden = model(inputs, False, 50)
        # show_plot('generated', outputs.data.view(-1), True)



Epoch Number : 0 Batch_Number: 0 Avg Loss: 0.05318392813205719
