# LSTM Baseline - Chromagram, MFCCs, Spectrogram

Code References:
- Data loading: https://discuss.pytorch.org/t/input-numpy-ndarray-instead-of-images-in-a-cnn/18797/3
- Train and test functions adapted from discussion sections code
- LSTM Code adapted from https://discuss.pytorch.org/t/example-of-many-to-one-lstm/1728/2

LSTM has two hidden layers and 5 dimensional hidden features

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import torch.autograd as autograd
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

## LSTM

In [2]:
# from https://discuss.pytorch.org/t/input-numpy-ndarray-instead-of-images-in-a-cnn/18797/3

class MyDataset(Dataset):
    def __init__(self, data, target, transform=None):
        self.data = torch.from_numpy(data).float()
        self.target = torch.from_numpy(target).long()
        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        
        if self.transform:
            x = self.transform(x)
            
        return x, y
    
    def __len__(self):
        return len(self.data)

## Chromagrams

In [3]:
## Load data
train_chroma = np.load("Data/train_chroma12.npy")
test_chroma = np.load("Data/test_chroma12.npy")
train_labels = np.load("Data/train_labels_chroma12.npy")
test_labels = np.load("Data/test_labels_chroma12.npy")

In [4]:
## Need to split sequences, currently too long
train_chroma.shape


(5366, 12, 431)

In [5]:
trainC = np.concatenate(np.array_split(train_chroma, 12, axis = 2)[:-1])
train_labels = np.tile(train_labels, 11)

testC = np.concatenate(np.array_split(test_chroma, 12, axis = 2)[:-1])
test_labels = np.tile(test_labels, 11)

In [6]:
trainC.shape

(59026, 12, 36)

In [7]:
train_labels

array([2, 4, 1, ..., 2, 4, 1])

In [8]:
bs = 32

train_set = MyDataset(trainC, train_labels)
train_loader = DataLoader(train_set, batch_size=bs, shuffle=True)
test_set = MyDataset(testC, test_labels)
test_loader = DataLoader(test_set, batch_size=bs, shuffle=False)

In [9]:
## Code adapted from https://discuss.pytorch.org/t/example-of-many-to-one-lstm/1728/2

classes_no = 5
in_size = 12

model = nn.LSTM(in_size, classes_no, 2).float()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters()) #lr=0.0001, weight_decay=1e-5

In [3]:
def train(model, train_loader, criterion, optimizer, epoch):
    train_loss = 0
    model.train()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        batch_size = data.shape[0]
        time_steps = data.shape[2]
        in_size = data.shape[1]
        
        
        input_seq = data.view(time_steps, batch_size, in_size)
        
        output_seq, _ = model(input_seq.float())
        last_output = output_seq[-1]
        
        model.zero_grad()
        loss = criterion(last_output, target)
        loss.backward()
        
        optimizer.step()
        train_loss += loss.item()
        if batch_idx % (len(train_loader)//2) == 0:
            print('Train({})[{:.0f}%]: Loss: {:.4f}'.format(
                epoch, 100. * batch_idx / len(train_loader), train_loss/(batch_idx+1)))

def test(model, test_loader, criterion, epoch, batch_size = 32):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            batch_size = data.shape[0]
            time_steps = data.shape[2]
            in_size = data.shape[1]
        
        
            input_seq = data.view(time_steps, batch_size, in_size)
        
            output_seq, _ = model(input_seq.float())
            last_output = output_seq[-1]
            
            test_loss += criterion(last_output, target).item() # sum up batch loss
            pred = last_output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            
            correct += pred.eq(target.view_as(pred)).sum().item()
    
    test_loss = (test_loss*batch_size)/len(test_loader.dataset)
    print('Test({}): Loss: {:.4f}, Accuracy: {:.4f}%'.format(
        epoch, test_loss, 100. * correct / len(test_loader.dataset)))

In [11]:
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(model, train_loader, criterion, optimizer, epoch)
    test(model, test_loader, criterion, epoch)

Train(1)[0%]: Loss: 1.6192
Train(1)[50%]: Loss: 1.5803
Train(1)[100%]: Loss: 1.5783
Test(1): Loss: 1.0909, Accuracy: 25.1124%
Train(2)[0%]: Loss: 1.5080
Train(2)[50%]: Loss: 1.5770
Train(2)[100%]: Loss: 1.5756
Test(2): Loss: 1.0895, Accuracy: 25.1195%
Train(3)[0%]: Loss: 1.5614
Train(3)[50%]: Loss: 1.5751
Train(3)[100%]: Loss: 1.5757
Test(3): Loss: 1.0905, Accuracy: 25.1124%
Train(4)[0%]: Loss: 1.4916
Train(4)[50%]: Loss: 1.5764
Train(4)[100%]: Loss: 1.5755
Test(4): Loss: 1.0897, Accuracy: 25.1219%
Train(5)[0%]: Loss: 1.5889
Train(5)[50%]: Loss: 1.5754
Train(5)[100%]: Loss: 1.5755
Test(5): Loss: 1.0903, Accuracy: 25.1337%
Train(6)[0%]: Loss: 1.5464
Train(6)[50%]: Loss: 1.5747
Train(6)[100%]: Loss: 1.5754
Test(6): Loss: 1.0900, Accuracy: 25.1171%
Train(7)[0%]: Loss: 1.5618
Train(7)[50%]: Loss: 1.5761
Train(7)[100%]: Loss: 1.5754
Test(7): Loss: 1.0907, Accuracy: 25.1171%
Train(8)[0%]: Loss: 1.6087
Train(8)[50%]: Loss: 1.5740
Train(8)[100%]: Loss: 1.5754
Test(8): Loss: 1.0899, Accuracy: 2

## MFCC

In [12]:
## Load data
train_mfcc = np.load("Data/train_mfcc13.npy")
test_mfcc = np.load("Data/test_mfcc13.npy")
train_labels = np.load("Data/train_labels_mfcc13.npy")
test_labels = np.load("Data/test_labels_mfcc13.npy")
## Need to split sequences, currently too long
train_mfcc.shape


(5366, 13, 431)

In [13]:
trainM = np.concatenate(np.array_split(train_mfcc, 12, axis = 2)[:-1])
train_labels = np.tile(train_labels, 11)

testM = np.concatenate(np.array_split(test_mfcc, 12, axis = 2)[:-1])
test_labels = np.tile(test_labels, 11)

In [14]:
trainM.shape

(59026, 13, 36)

In [15]:
bs = 32

train_set = MyDataset(trainM, train_labels)
train_loader = DataLoader(train_set, batch_size=bs, shuffle=True)
test_set = MyDataset(testM, test_labels)
test_loader = DataLoader(test_set, batch_size=bs, shuffle=False)

In [16]:
classes_no = 5
in_size = 13

model = nn.LSTM(in_size, classes_no, 2).float()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5) #lr=0.0001, weight_decay=1e-5

In [17]:
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(model, train_loader, criterion, optimizer, epoch)
    test(model, test_loader, criterion, epoch)

Train(1)[0%]: Loss: 1.6423
Train(1)[50%]: Loss: 1.6250
Train(1)[100%]: Loss: 1.6147
Test(1): Loss: 1.0978, Accuracy: 25.1195%
Train(2)[0%]: Loss: 1.6001
Train(2)[50%]: Loss: 1.5862
Train(2)[100%]: Loss: 1.5838
Test(2): Loss: 1.0914, Accuracy: 25.1171%
Train(3)[0%]: Loss: 1.5813
Train(3)[50%]: Loss: 1.5788
Train(3)[100%]: Loss: 1.5774
Test(3): Loss: 1.0901, Accuracy: 25.1171%
Train(4)[0%]: Loss: 1.5414
Train(4)[50%]: Loss: 1.5760
Train(4)[100%]: Loss: 1.5756
Test(4): Loss: 1.0901, Accuracy: 25.1171%
Train(5)[0%]: Loss: 1.6211
Train(5)[50%]: Loss: 1.5755
Train(5)[100%]: Loss: 1.5754
Test(5): Loss: 1.0903, Accuracy: 25.1171%
Train(6)[0%]: Loss: 1.6749
Train(6)[50%]: Loss: 1.5751
Train(6)[100%]: Loss: 1.5753
Test(6): Loss: 1.0903, Accuracy: 25.1171%
Train(7)[0%]: Loss: 1.5018
Train(7)[50%]: Loss: 1.5766
Train(7)[100%]: Loss: 1.5753
Test(7): Loss: 1.0904, Accuracy: 25.1171%
Train(8)[0%]: Loss: 1.5397
Train(8)[50%]: Loss: 1.5742
Train(8)[100%]: Loss: 1.5753
Test(8): Loss: 1.0902, Accuracy: 2

## Spectrograms

In [4]:
## Load data

train_stft1 = np.load("Data/train_stft-dB-1.npy", allow_pickle = True)
test_stft1 = np.load("Data/test_stft-dB-1.npy")
train_labels1 = np.load("Data/train_labels_stft-dB-1.npy")
test_labels1 = np.load("Data/test_labels_stft-dB-1.npy")

## Load new data
train_stft2 = np.load("Data/train_stft-dB-2.npy", allow_pickle = True)
test_stft2 = np.load("Data/test_stft-dB-2.npy")
train_labels2 = np.load("Data/train_labels_stft-dB-2.npy")
test_labels2 = np.load("Data/test_labels_stft-dB-2.npy")

train_stft = np.concatenate((train_stft1, train_stft2))
test_stft = np.concatenate((test_stft1, test_stft2))
train_labels = np.concatenate((train_labels1, train_labels2))
test_labels = np.concatenate((test_labels1, test_labels2))

print(train_stft.shape)

(2147, 64, 431)


In [5]:
trainS = np.concatenate(np.array_split(train_stft, 12, axis = 2)[:-1])
train_labels = np.tile(train_labels, 11)

testS = np.concatenate(np.array_split(test_stft, 12, axis = 2)[:-1])
test_labels = np.tile(test_labels, 11)

In [6]:
trainS.shape

(23617, 64, 36)

In [7]:
bs = 32

train_set = MyDataset(trainS, train_labels)
train_loader = DataLoader(train_set, batch_size=bs, shuffle=True)
test_set = MyDataset(testS, test_labels)
test_loader = DataLoader(test_set, batch_size=bs, shuffle=False)

In [10]:
classes_no = 5
in_size = 64

model = nn.LSTM(in_size, classes_no, 2).float()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5) 

In [11]:
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(model, train_loader, criterion, optimizer, epoch)
    test(model, test_loader, criterion, epoch)

Train(1)[0%]: Loss: 1.6520
Train(1)[50%]: Loss: 1.5988
Train(1)[100%]: Loss: 1.5941
Test(1): Loss: 1.0977, Accuracy: 23.7262%
Train(2)[0%]: Loss: 1.5751
Train(2)[50%]: Loss: 1.5844
Train(2)[100%]: Loss: 1.5822
Test(2): Loss: 1.0951, Accuracy: 24.3468%
Train(3)[0%]: Loss: 1.5766
Train(3)[50%]: Loss: 1.5806
Train(3)[100%]: Loss: 1.5778
Test(3): Loss: 1.0946, Accuracy: 24.3409%
Train(4)[0%]: Loss: 1.4484
Train(4)[50%]: Loss: 1.5803
Train(4)[100%]: Loss: 1.5768
Test(4): Loss: 1.0944, Accuracy: 24.3646%
Train(5)[0%]: Loss: 1.5410
Train(5)[50%]: Loss: 1.5792
Train(5)[100%]: Loss: 1.5761
Test(5): Loss: 1.0943, Accuracy: 24.3705%
Train(6)[0%]: Loss: 1.5542
Train(6)[50%]: Loss: 1.5747
Train(6)[100%]: Loss: 1.5763
Test(6): Loss: 1.0942, Accuracy: 24.3705%
Train(7)[0%]: Loss: 1.5921
Train(7)[50%]: Loss: 1.5759
Train(7)[100%]: Loss: 1.5752
Test(7): Loss: 1.0941, Accuracy: 24.3587%
Train(8)[0%]: Loss: 1.5238
Train(8)[50%]: Loss: 1.5735
Train(8)[100%]: Loss: 1.5752
Test(8): Loss: 1.0940, Accuracy: 2

In [12]:
## Load data

train_stft1 = np.load("Data/train_stft-dB-3.npy", allow_pickle = True)
test_stft1 = np.load("Data/test_stft-dB-3.npy")
train_labels1 = np.load("Data/train_labels_stft-dB-3.npy")
test_labels1 = np.load("Data/test_labels_stft-dB-3.npy")

## Load new data
train_stft2 = np.load("Data/train_stft-dB-4.npy", allow_pickle = True)
test_stft2 = np.load("Data/test_stft-dB-4.npy")
train_labels2 = np.load("Data/train_labels_stft-dB-4.npy")
test_labels2 = np.load("Data/test_labels_stft-dB-4.npy")

train_stft = np.concatenate((train_stft1, train_stft2))
test_stft = np.concatenate((test_stft1, test_stft2))
train_labels = np.concatenate((train_labels1, train_labels2))
test_labels = np.concatenate((test_labels1, test_labels2))

print(train_stft.shape)

trainS = np.concatenate(np.array_split(train_stft, 12, axis = 2)[:-1])
train_labels = np.tile(train_labels, 11)

testS = np.concatenate(np.array_split(test_stft, 12, axis = 2)[:-1])
test_labels = np.tile(test_labels, 11)

bs = 32

train_set = MyDataset(trainS, train_labels)
train_loader = DataLoader(train_set, batch_size=bs, shuffle=True)
test_set = MyDataset(testS, test_labels)
test_loader = DataLoader(test_set, batch_size=bs, shuffle=False)

(2146, 64, 431)


In [13]:
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(model, train_loader, criterion, optimizer, epoch)
    test(model, test_loader, criterion, epoch)

Train(1)[0%]: Loss: 1.5894
Train(1)[50%]: Loss: 1.5757
Test(1): Loss: 1.5871, Accuracy: 25.5741%
Train(2)[0%]: Loss: 1.5440
Train(2)[50%]: Loss: 1.5747
Test(2): Loss: 1.5874, Accuracy: 25.4202%
Train(3)[0%]: Loss: 1.7034
Train(3)[50%]: Loss: 1.5729
Test(3): Loss: 1.5870, Accuracy: 25.3729%
Train(4)[0%]: Loss: 1.5665
Train(4)[50%]: Loss: 1.5739
Test(4): Loss: 1.5870, Accuracy: 25.3137%
Train(5)[0%]: Loss: 1.5195
Train(5)[50%]: Loss: 1.5734
Test(5): Loss: 1.5876, Accuracy: 25.3965%
Train(6)[0%]: Loss: 1.6145
Train(6)[50%]: Loss: 1.5719
Test(6): Loss: 1.5872, Accuracy: 25.3610%
Train(7)[0%]: Loss: 1.5638
Train(7)[50%]: Loss: 1.5712
Test(7): Loss: 1.5869, Accuracy: 25.2545%
Train(8)[0%]: Loss: 1.6528
Train(8)[50%]: Loss: 1.5703
Test(8): Loss: 1.5873, Accuracy: 25.3018%
Train(9)[0%]: Loss: 1.5396
Train(9)[50%]: Loss: 1.5697
Test(9): Loss: 1.5871, Accuracy: 25.3374%
Train(10)[0%]: Loss: 1.5201
Train(10)[50%]: Loss: 1.5731
Test(10): Loss: 1.5875, Accuracy: 25.1361%


In [14]:
## Load data

train_stft = np.load("Data/train_stft-dB-5.npy", allow_pickle = True)
test_stft = np.load("Data/test_stft-dB-5.npy")
train_labels = np.load("Data/train_labels_stft-dB-5.npy")
test_labels = np.load("Data/test_labels_stft-dB-5.npy")

print(train_stft.shape)

trainS = np.concatenate(np.array_split(train_stft, 12, axis = 2)[:-1])
train_labels = np.tile(train_labels, 11)

testS = np.concatenate(np.array_split(test_stft, 12, axis = 2)[:-1])
test_labels = np.tile(test_labels, 11)

bs = 32

train_set = MyDataset(trainS, train_labels)
train_loader = DataLoader(train_set, batch_size=bs, shuffle=True)
test_set = MyDataset(testS, test_labels)
test_loader = DataLoader(test_set, batch_size=bs, shuffle=False)

(1073, 64, 431)


In [15]:
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(model, train_loader, criterion, optimizer, epoch)
    test(model, test_loader, criterion, epoch)

Train(1)[0%]: Loss: 1.5610
Train(1)[50%]: Loss: 1.5867
Train(1)[100%]: Loss: 1.5830
Test(1): Loss: 1.5795, Accuracy: 25.2486%
Train(2)[0%]: Loss: 1.5371
Train(2)[50%]: Loss: 1.5864
Train(2)[100%]: Loss: 1.5824
Test(2): Loss: 1.5798, Accuracy: 25.2723%
Train(3)[0%]: Loss: 1.6640
Train(3)[50%]: Loss: 1.5838
Train(3)[100%]: Loss: 1.5817
Test(3): Loss: 1.5802, Accuracy: 25.2131%
Train(4)[0%]: Loss: 1.5385
Train(4)[50%]: Loss: 1.5839
Train(4)[100%]: Loss: 1.5822
Test(4): Loss: 1.5804, Accuracy: 25.2249%
Train(5)[0%]: Loss: 1.5474
Train(5)[50%]: Loss: 1.5849
Train(5)[100%]: Loss: 1.5820
Test(5): Loss: 1.5806, Accuracy: 25.2723%
Train(6)[0%]: Loss: 1.5745
Train(6)[50%]: Loss: 1.5820
Train(6)[100%]: Loss: 1.5822
Test(6): Loss: 1.5809, Accuracy: 25.2249%
Train(7)[0%]: Loss: 1.6644
Train(7)[50%]: Loss: 1.5820
Train(7)[100%]: Loss: 1.5818
Test(7): Loss: 1.5809, Accuracy: 25.2131%
Train(8)[0%]: Loss: 1.6928
Train(8)[50%]: Loss: 1.5825
Train(8)[100%]: Loss: 1.5817
Test(8): Loss: 1.5810, Accuracy: 2