In [28]:
import feature_extraction as fe

import numpy as np

import random

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [29]:
spectrograms, labels = fe.extract_spectrograms('../datasets/GTZAN.gui')

Currently processing:  100
Currently processing:  200
Currently processing:  300
Currently processing:  400
Currently processing:  500
Currently processing:  600
Currently processing:  700
Currently processing:  800
Currently processing:  900
Currently processing:  1000


In [31]:
x_trainval, x_test, y_trainval, y_test = train_test_split(spectrograms, labels, test_size=0.2)
x_train, x_val, y_train, y_val = train_test_split(x_trainval, y_trainval, test_size=0.25)

In [32]:
train_data = [(x, y) for x, y in zip(x_train, y_train)]
val_data = [(x, y) for x, y in zip(x_val, y_val)]
test_data = [(x, y) for x, y in zip(x_test, y_test)]

print(len(train_data), "training samples")
print(len(val_data), "validation samples")
print(len(test_data), "test samples")

600 training samples
200 validation samples
200 test samples


In [44]:
seed = 1111
torch.manual_seed(seed)

# Hyper-parameters 
input_size = 25
hidden_size = [256, 128, 64]
#hidden_size = [25, 20, 15]
n_features = [256, 128, 64] #Get from xtrain (see BatchNorm1d)
kernel_size = 5

num_classes = 10
num_epochs = 100
batch_size = 32
learning_rate = 0.001

# Fully connected neural network with one hidden layer
class CNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(CNNClassifier, self).__init__()
        self.conv1d1 = nn.Conv1d(input_size, hidden_size[0], kernel_size)
        self.conv1d1_bn = nn.BatchNorm1d(n_features, momentum=0.9)
        self.pool1 = torch.nn.MaxPool1d(kernel_size=2)
        self.dropout1 = nn.Dropout(0.5)
        
        self.conv1d2 = nn.Conv1d(input_size, hidden_size[1], kernel_size)
        self.conv1d2_bn = nn.BatchNorm1d(n_features, momentum=0.9)
        self.pool2 = torch.nn.MaxPool1d(kernel_size=2)
        self.dropout2 = nn.Dropout(0.5)
        
        self.conv1d3 = nn.Conv1d(input_size, hidden_size[2], kernel_size)
        self.conv1d3_bn = nn.BatchNorm1d(n_features, momentum=0.9)
        self.pool3 = torch.nn.MaxPool1d(kernel_size=2)
        self.dropout3 = nn.Dropout(0.5)
        
        self.fc = nn.Linear(hidden_size[0], num_classes)
        
    
    def forward(self, x):
        #order: conv, batchnorm, activation, maxpool, drop
        #Calls:
        #Convolution 1d: out = F.relu(self.conv1d1(x))
        #Batch           normalization: out = self.conv1d1_bn(out)
        #Dropout:        out = self.dropout1(out)
        #Max Pooling:    out = self.pool1(out)
        
        #Layer1
        out = F.relu(self.conv1d1_bn(self.conv1d1(x)))
        out = self.pool1(out)
        out = self.dropout1(out)
        
        #Layer2
        out = F.relu(self.conv1d2_bn(self.conv1d2(x)))
        out = self.pool2(out)
        out = self.dropout2(out)
        
        #Layer3
        out = F.relu(self.conv1d2_bn(self.conv1d2(x)))
        out = self.pool2(out)
        out = self.dropout2(out)
        
        #TimeDistributed
        
        
        #Time_distributed_merge_layer
        
        
        #Softmax
        out = self.softmax(self.fc(out))
        
        return out # we can return here the softmax, but then we should use the nll_loss instead of the cross_entropy
    

model = CNNClassifier(input_size, hidden_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 

In [37]:
def data_chunker(data, batch_size):
    random.shuffle(data)
    return [data[i:i + batch_size] for i in range(0, len(data), batch_size)]

In [45]:
# Train the model
total_step = len(train_data)
for epoch in range(num_epochs):
    #for i, (features, label) in enumerate(train_data):
    data_chunks = data_chunker(train_data, batch_size)
    for i, chunk in enumerate(data_chunks):
        x, y = list(zip(*chunk))
        
        # Move tensors to the configured device
        features = torch.from_numpy(np.array(x)).float().to(device)
        #label = torch.from_numpy(np.array([label])).to(device)
        lbls = torch.from_numpy(np.array(y)).to(device)

        # clear all the gradients of the optimized tensors
        optimizer.zero_grad()

        # Forward pass
        output = model(spectrograms)
        loss = criterion(output, lbls)

        # Backward and optimize
        loss.backward()
        optimizer.step()

        if (i+1)*batch_size % 200 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, (i+1)*batch_size, total_step, loss.item()))

TypeError: conv1d(): argument 'input' (position 1) must be Tensor, not numpy.ndarray