# Baselines - Feed Forward for Different Representations

Code References:
- Data loading: https://discuss.pytorch.org/t/input-numpy-ndarray-instead-of-images-in-a-cnn/18797/3
- Train and test functions adapted from discussion sections code

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import Dataset, DataLoader

## Perceptron - 1 Hidden Layer

In [2]:
class perceptron(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(perceptron, self).__init__()
        self.fc1 = nn.Linear(in_features=in_channels, out_features= 1024)
        self.bn1 = nn.BatchNorm1d(num_features=1024)
        self.fc2 = nn.Linear(in_features=1024, out_features= out_channels)
    
    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = F.softmax(self.fc2(x), dim = 1)
        return x

### Chromagrams

In [3]:
## Load data
train_chroma = np.load("Data/train_chroma12.npy")
test_chroma = np.load("Data/test_chroma12.npy")
train_labels = np.load("Data/train_labels_chroma12.npy")
test_labels = np.load("Data/test_labels_chroma12.npy")

In [4]:
train_chroma.shape

(5366, 12, 431)

In [5]:
test_chroma.shape

(3842, 12, 431)

In [6]:
print(np.mean(train_chroma), np.std(train_chroma))

0.003052163 0.9994745


In [7]:
# from https://discuss.pytorch.org/t/input-numpy-ndarray-instead-of-images-in-a-cnn/18797/3

class MyDataset(Dataset):
    def __init__(self, data, target, transform=None):
        self.data = torch.from_numpy(data).float()
        self.target = torch.from_numpy(target).long()
        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        
        if self.transform:
            x = self.transform(x)
            
        return x, y
    
    def __len__(self):
        return len(self.data)

In [8]:
bs = 32

train_set = MyDataset(train_chroma, train_labels)
train_loader = DataLoader(train_set, batch_size=bs, shuffle=True)
test_set = MyDataset(test_chroma, test_labels)
test_loader = DataLoader(test_set, batch_size=bs, shuffle=False)

In [9]:
train_chroma.shape

(5366, 12, 431)

In [10]:
def train(model, train_loader, criterion, optimizer, epoch):
    train_loss = 0
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.view(data.shape[0], data.shape[1]*data.shape[2])
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        if batch_idx % (len(train_loader)//2) == 0:
            print('Train({})[{:.0f}%]: Loss: {:.4f}'.format(
                epoch, 100. * batch_idx / len(train_loader), train_loss/(batch_idx+1)))

def test(model, test_loader, criterion, epoch, batch_size = 32):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data = data.view(data.shape[0], data.shape[1]*data.shape[2])
            output = model(data)
            test_loss += criterion(output, target).item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    
    test_loss = (test_loss*batch_size)/len(test_loader.dataset)
    print('Test({}): Loss: {:.4f}, Accuracy: {:.4f}%'.format(
        epoch, test_loss, 100. * correct / len(test_loader.dataset)))

In [11]:
num_epochs = 10
# device = torch.device(device)
torch.manual_seed(17)

<torch._C.Generator at 0x7f4f38295fb0>

In [12]:
in_channels = train_chroma.shape[1]*train_chroma.shape[2]
out_channels = 5
model = perceptron(in_channels, out_channels) #.to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)

In [13]:
for epoch in range(1, num_epochs + 1):
    train(model, train_loader, criterion, optimizer, epoch)
    test(model, test_loader, criterion, epoch)

Train(1)[0%]: Loss: 1.6113
Train(1)[50%]: Loss: 1.4828
Test(1): Loss: 1.5031, Accuracy: 38.4956%
Train(2)[0%]: Loss: 1.3561
Train(2)[50%]: Loss: 1.3047
Test(2): Loss: 1.5130, Accuracy: 38.4695%
Train(3)[0%]: Loss: 1.3924
Train(3)[50%]: Loss: 1.2124
Test(3): Loss: 1.5128, Accuracy: 37.0901%
Train(4)[0%]: Loss: 1.2954
Train(4)[50%]: Loss: 1.1342
Test(4): Loss: 1.5219, Accuracy: 36.7257%
Train(5)[0%]: Loss: 1.2120
Train(5)[50%]: Loss: 1.0807
Test(5): Loss: 1.5254, Accuracy: 36.2051%
Train(6)[0%]: Loss: 0.9814
Train(6)[50%]: Loss: 1.0395
Test(6): Loss: 1.5366, Accuracy: 35.1119%
Train(7)[0%]: Loss: 1.1278
Train(7)[50%]: Loss: 1.0150
Test(7): Loss: 1.5362, Accuracy: 35.2421%
Train(8)[0%]: Loss: 0.9940
Train(8)[50%]: Loss: 0.9878
Test(8): Loss: 1.5396, Accuracy: 34.8777%
Train(9)[0%]: Loss: 0.9591
Train(9)[50%]: Loss: 0.9760
Test(9): Loss: 1.5387, Accuracy: 34.8256%
Train(10)[0%]: Loss: 1.0769
Train(10)[50%]: Loss: 0.9633
Test(10): Loss: 1.5414, Accuracy: 34.3571%


### MFCC-13

In [14]:
## Load data
train_mfcc = np.load("Data/train_mfcc13.npy")
test_mfcc = np.load("Data/test_mfcc13.npy")
train_labels = np.load("Data/train_labels_mfcc13.npy")
test_labels = np.load("Data/test_labels_mfcc13.npy")

In [15]:
bs = 32

train_set_mfcc = MyDataset(train_mfcc, train_labels)
train_loader_mfcc = DataLoader(train_set_mfcc, batch_size=bs, shuffle=True)
test_set_mfcc = MyDataset(test_mfcc, test_labels)
test_loader_mfcc = DataLoader(test_set_mfcc, batch_size=bs, shuffle=False)

In [16]:
in_channels = train_mfcc.shape[1]*train_mfcc.shape[2]
out_channels = 5
model = perceptron(in_channels, out_channels) #.to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)

In [17]:
for epoch in range(1, num_epochs + 1):
    train(model, train_loader_mfcc, criterion, optimizer, epoch)
    test(model, test_loader_mfcc, criterion, epoch)

Train(1)[0%]: Loss: 1.6064
Train(1)[50%]: Loss: 1.4532
Test(1): Loss: 1.4213, Accuracy: 47.1629%
Train(2)[0%]: Loss: 1.2893
Train(2)[50%]: Loss: 1.3162
Test(2): Loss: 1.3757, Accuracy: 55.5440%
Train(3)[0%]: Loss: 1.2204
Train(3)[50%]: Loss: 1.2551
Test(3): Loss: 1.4045, Accuracy: 50.0260%
Train(4)[0%]: Loss: 1.2398
Train(4)[50%]: Loss: 1.2168
Test(4): Loss: 1.3956, Accuracy: 50.1562%
Train(5)[0%]: Loss: 1.2242
Train(5)[50%]: Loss: 1.1859
Test(5): Loss: 1.3698, Accuracy: 54.9714%
Train(6)[0%]: Loss: 1.1897
Train(6)[50%]: Loss: 1.1575
Test(6): Loss: 1.3736, Accuracy: 53.9563%
Train(7)[0%]: Loss: 1.2222
Train(7)[50%]: Loss: 1.1319
Test(7): Loss: 1.3672, Accuracy: 54.1645%
Train(8)[0%]: Loss: 1.2113
Train(8)[50%]: Loss: 1.1038
Test(8): Loss: 1.4226, Accuracy: 47.8917%
Train(9)[0%]: Loss: 1.0940
Train(9)[50%]: Loss: 1.0970
Test(9): Loss: 1.3703, Accuracy: 53.9823%
Train(10)[0%]: Loss: 1.1379
Train(10)[50%]: Loss: 1.0922
Test(10): Loss: 1.3670, Accuracy: 53.9823%


## Feed Forward - Examine Impact of Vertical + Horizontal Structure Loss

- 4 hidden layers
- each has 1024 hidden units
- batch normalization and weight decay
- relu for each activation
- softmax output and cross entropy loss

In [18]:
class feed_forward(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(feed_forward, self).__init__()
        self.fc1 = nn.Linear(in_features=in_channels, out_features= 1024)
        self.bn1 = nn.BatchNorm1d(num_features=1024)
        self.fc2 = nn.Linear(in_features=1024, out_features= 512)
        self.bn2 = nn.BatchNorm1d(num_features=512)
        self.fc3 = nn.Linear(in_features=512, out_features= 256)
        self.bn3 = nn.BatchNorm1d(num_features=256)
        self.fc4 = nn.Linear(in_features=256, out_features= 128)
        self.bn4 = nn.BatchNorm1d(num_features=128)
        self.fc5 = nn.Linear(in_features=128, out_features= 64)
        self.bn5 = nn.BatchNorm1d(num_features=64)
        self.fc6 = nn.Linear(in_features=64, out_features=out_channels)
    
    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = F.relu(self.bn2(self.fc2(x)))
        x = F.relu(self.bn3(self.fc3(x)))
        x = F.relu(self.bn4(self.fc4(x)))
        x = F.relu(self.bn5(self.fc5(x)))
#         x = self.fc4(x)
        x = F.softmax(self.fc6(x), dim = 1)
        return x

## Chromagrams

In [19]:
128/2

64.0

In [20]:
in_channels = train_chroma.shape[1]*train_chroma.shape[2]
out_channels = 5
model = feed_forward(in_channels, out_channels) #.to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)

In [21]:
for epoch in range(1, num_epochs + 1):
    train(model, train_loader, criterion, optimizer, epoch)
    test(model, test_loader, criterion, epoch)

Train(1)[0%]: Loss: 1.6074
Train(1)[50%]: Loss: 1.5573
Test(1): Loss: 1.5372, Accuracy: 39.4066%
Train(2)[0%]: Loss: 1.3934
Train(2)[50%]: Loss: 1.3612
Test(2): Loss: 1.5252, Accuracy: 40.7340%
Train(3)[0%]: Loss: 1.2151
Train(3)[50%]: Loss: 1.2166
Test(3): Loss: 1.5157, Accuracy: 40.5258%
Train(4)[0%]: Loss: 1.1171
Train(4)[50%]: Loss: 1.1223
Test(4): Loss: 1.5171, Accuracy: 38.2613%
Train(5)[0%]: Loss: 1.0248
Train(5)[50%]: Loss: 1.0472
Test(5): Loss: 1.5243, Accuracy: 38.2093%
Train(6)[0%]: Loss: 1.0021
Train(6)[50%]: Loss: 1.0134
Test(6): Loss: 1.5123, Accuracy: 39.1983%
Train(7)[0%]: Loss: 0.9718
Train(7)[50%]: Loss: 0.9779
Test(7): Loss: 1.5103, Accuracy: 38.8600%
Train(8)[0%]: Loss: 0.9529
Train(8)[50%]: Loss: 0.9638
Test(8): Loss: 1.5167, Accuracy: 37.4805%
Train(9)[0%]: Loss: 0.9647
Train(9)[50%]: Loss: 0.9485
Test(9): Loss: 1.5163, Accuracy: 37.6106%
Train(10)[0%]: Loss: 0.9674
Train(10)[50%]: Loss: 0.9507
Test(10): Loss: 1.5053, Accuracy: 38.6257%


## MFCC - 13

In [22]:
in_channels = train_mfcc.shape[1]*train_mfcc.shape[2]
out_channels = 5
model = feed_forward(in_channels, out_channels) #.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)

In [23]:
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(model, train_loader_mfcc, criterion, optimizer, epoch)
    test(model, test_loader_mfcc, criterion, epoch)

Train(1)[0%]: Loss: 1.6025
Train(1)[50%]: Loss: 1.5330
Test(1): Loss: 1.4530, Accuracy: 51.0672%
Train(2)[0%]: Loss: 1.4233
Train(2)[50%]: Loss: 1.3732
Test(2): Loss: 1.4282, Accuracy: 54.2166%
Train(3)[0%]: Loss: 1.3413
Train(3)[50%]: Loss: 1.2978
Test(3): Loss: 1.4115, Accuracy: 52.0302%
Train(4)[0%]: Loss: 1.1278
Train(4)[50%]: Loss: 1.2315
Test(4): Loss: 1.3595, Accuracy: 57.4180%
Train(5)[0%]: Loss: 1.1386
Train(5)[50%]: Loss: 1.1937
Test(5): Loss: 1.3981, Accuracy: 52.0042%
Train(6)[0%]: Loss: 1.1493
Train(6)[50%]: Loss: 1.1483
Test(6): Loss: 1.3498, Accuracy: 56.4029%
Train(7)[0%]: Loss: 1.1657
Train(7)[50%]: Loss: 1.1282
Test(7): Loss: 1.3452, Accuracy: 57.0016%
Train(8)[0%]: Loss: 1.1316
Train(8)[50%]: Loss: 1.1012
Test(8): Loss: 1.3770, Accuracy: 52.9932%
Train(9)[0%]: Loss: 1.0526
Train(9)[50%]: Loss: 1.1038
Test(9): Loss: 1.3757, Accuracy: 53.5659%
Train(10)[0%]: Loss: 1.1543
Train(10)[50%]: Loss: 1.0709
Test(10): Loss: 1.3354, Accuracy: 57.8605%


## Spectrograms - Subset

- Train Model on each subset of data at a time

In [24]:
## Load data

train_stft1 = np.load("Data/train_stft-dB-1.npy", allow_pickle = True)
test_stft1 = np.load("Data/test_stft-dB-1.npy")
train_labels1 = np.load("Data/train_labels_stft-dB-1.npy")
test_labels1 = np.load("Data/test_labels_stft-dB-1.npy")

## Load new data
train_stft2 = np.load("Data/train_stft-dB-2.npy", allow_pickle = True)
test_stft2 = np.load("Data/test_stft-dB-2.npy")
train_labels2 = np.load("Data/train_labels_stft-dB-2.npy")
test_labels2 = np.load("Data/test_labels_stft-dB-2.npy")


In [25]:
train_stft = np.concatenate((train_stft1, train_stft2))
test_stft = np.concatenate((test_stft1, test_stft2))
train_labels = np.concatenate((train_labels1, train_labels2))
test_labels = np.concatenate((test_labels1, test_labels2))

In [26]:
train_stft.shape

(2147, 64, 431)

In [27]:
## Setup model

in_channels = train_stft.shape[1]*train_stft.shape[2]
out_channels = 5
model = feed_forward(in_channels, out_channels) #.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)

In [28]:
bs = 32

train_set = MyDataset(train_stft, train_labels)
train_loader = DataLoader(train_set, batch_size=bs, shuffle=True)
test_set = MyDataset(test_stft, test_labels)
test_loader = DataLoader(test_set, batch_size=bs, shuffle=False)

In [29]:
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(model, train_loader, criterion, optimizer, epoch)
    test(model, test_loader, criterion, epoch)

Train(1)[0%]: Loss: 1.6184
Train(1)[50%]: Loss: 1.5944
Test(1): Loss: 1.5633, Accuracy: 41.7425%
Train(2)[0%]: Loss: 1.5522
Train(2)[50%]: Loss: 1.4922
Test(2): Loss: 1.5189, Accuracy: 45.7737%
Train(3)[0%]: Loss: 1.4186
Train(3)[50%]: Loss: 1.4116
Test(3): Loss: 1.4997, Accuracy: 48.1795%
Train(4)[0%]: Loss: 1.2587
Train(4)[50%]: Loss: 1.3359
Test(4): Loss: 1.4798, Accuracy: 48.5696%
Train(5)[0%]: Loss: 1.2270
Train(5)[50%]: Loss: 1.2904
Test(5): Loss: 1.4846, Accuracy: 48.0494%
Train(6)[0%]: Loss: 1.2158
Train(6)[50%]: Loss: 1.2289
Test(6): Loss: 1.4712, Accuracy: 48.7646%
Train(7)[0%]: Loss: 1.2271
Train(7)[50%]: Loss: 1.1948
Test(7): Loss: 1.4633, Accuracy: 48.5046%
Train(8)[0%]: Loss: 1.2182
Train(8)[50%]: Loss: 1.1543
Test(8): Loss: 1.4723, Accuracy: 46.6840%
Train(9)[0%]: Loss: 1.1040
Train(9)[50%]: Loss: 1.1194
Test(9): Loss: 1.4873, Accuracy: 45.4486%
Train(10)[0%]: Loss: 1.1376
Train(10)[50%]: Loss: 1.1031
Test(10): Loss: 1.4543, Accuracy: 48.6346%


In [30]:
## Load new data
train_stft1 = np.load("Data/train_stft-dB-3.npy", allow_pickle = True)
test_stft1 = np.load("Data/test_stft-dB-3.npy")
train_labels1 = np.load("Data/train_labels_stft-dB-3.npy")
test_labels1 = np.load("Data/test_labels_stft-dB-3.npy")

train_stft2 = np.load("Data/train_stft-dB-4.npy", allow_pickle = True)
test_stft2 = np.load("Data/test_stft-dB-4.npy")
train_labels2 = np.load("Data/train_labels_stft-dB-4.npy")
test_labels2 = np.load("Data/test_labels_stft-dB-4.npy")

train_stft = np.concatenate((train_stft1, train_stft2))
test_stft = np.concatenate((test_stft1, test_stft2))
train_labels = np.concatenate((train_labels1, train_labels2))
test_labels = np.concatenate((test_labels1, test_labels2))

bs = 32

train_set = MyDataset(train_stft, train_labels)
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
test_set = MyDataset(test_stft, test_labels)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(model, train_loader, criterion, optimizer, epoch)
    test(model, test_loader, criterion, epoch)

Train(1)[0%]: Loss: 1.2863
Train(1)[50%]: Loss: 1.3045
Test(1): Loss: 1.3974, Accuracy: 51.4974%
Train(2)[0%]: Loss: 1.1357
Train(2)[50%]: Loss: 1.2374
Test(2): Loss: 1.3703, Accuracy: 54.8177%
Train(3)[0%]: Loss: 1.1405
Train(3)[50%]: Loss: 1.1600
Test(3): Loss: 1.3731, Accuracy: 53.1901%
Train(4)[0%]: Loss: 1.2903
Train(4)[50%]: Loss: 1.1382
Test(4): Loss: 1.3779, Accuracy: 52.2786%
Train(5)[0%]: Loss: 1.1144
Train(5)[50%]: Loss: 1.1071
Test(5): Loss: 1.3816, Accuracy: 52.0833%
Train(6)[0%]: Loss: 1.0812
Train(6)[50%]: Loss: 1.0647
Test(6): Loss: 1.3643, Accuracy: 53.7109%
Train(7)[0%]: Loss: 1.0875
Train(7)[50%]: Loss: 1.0561
Test(7): Loss: 1.3882, Accuracy: 51.7578%
Train(8)[0%]: Loss: 1.0247
Train(8)[50%]: Loss: 1.0291
Test(8): Loss: 1.3700, Accuracy: 53.9714%
Train(9)[0%]: Loss: 1.0430
Train(9)[50%]: Loss: 1.0250
Test(9): Loss: 1.3699, Accuracy: 53.5156%
Train(10)[0%]: Loss: 1.0294
Train(10)[50%]: Loss: 1.0247
Test(10): Loss: 1.3955, Accuracy: 50.9766%


In [31]:
## Load new data
train_stft = np.load("Data/train_stft-dB-5.npy", allow_pickle = True)
test_stft = np.load("Data/test_stft-dB-5.npy")
train_labels = np.load("Data/train_labels_stft-dB-5.npy")
test_labels = np.load("Data/test_labels_stft-dB-5.npy")


bs = 32

train_set = MyDataset(train_stft, train_labels)
train_loader = DataLoader(train_set, batch_size=bs, shuffle=True)
test_set = MyDataset(test_stft, test_labels)
test_loader = DataLoader(test_set, batch_size=bs, shuffle=False)

num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(model, train_loader, criterion, optimizer, epoch)
    test(model, test_loader, criterion, epoch)

Train(1)[0%]: Loss: 1.1574
Train(1)[50%]: Loss: 1.2358
Test(1): Loss: 1.3981, Accuracy: 51.8229%
Train(2)[0%]: Loss: 1.1395
Train(2)[50%]: Loss: 1.1886
Test(2): Loss: 1.3840, Accuracy: 52.4740%
Train(3)[0%]: Loss: 1.1853
Train(3)[50%]: Loss: 1.1411
Test(3): Loss: 1.3792, Accuracy: 51.3021%
Train(4)[0%]: Loss: 1.0862
Train(4)[50%]: Loss: 1.0964
Test(4): Loss: 1.3720, Accuracy: 54.2969%
Train(5)[0%]: Loss: 1.0977
Train(5)[50%]: Loss: 1.0691
Test(5): Loss: 1.3704, Accuracy: 54.8177%
Train(6)[0%]: Loss: 1.1407
Train(6)[50%]: Loss: 1.0408
Test(6): Loss: 1.3792, Accuracy: 51.6927%
Train(7)[0%]: Loss: 1.0452
Train(7)[50%]: Loss: 1.0188
Test(7): Loss: 1.3969, Accuracy: 49.8698%
Train(8)[0%]: Loss: 1.0318
Train(8)[50%]: Loss: 1.0111
Test(8): Loss: 1.3932, Accuracy: 50.0000%
Train(9)[0%]: Loss: 1.0377
Train(9)[50%]: Loss: 1.0000
Test(9): Loss: 1.3718, Accuracy: 52.9948%
Train(10)[0%]: Loss: 0.9355
Train(10)[50%]: Loss: 0.9885
Test(10): Loss: 1.3850, Accuracy: 52.3438%
