In [1]:
import feature_extraction as fe

import numpy as np

import os
import random
import pickle
import warnings
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

seed = 3
random.seed(seed)
torch.manual_seed(seed)

warnings.filterwarnings('ignore')

In [2]:
def data_batcher(data, batch_size):
    random.shuffle(data)
    return [data[i:i + batch_size] for i in range(0, len(data), batch_size)]

In [3]:
dataset = 'GTZAN'
new_feats = False


dataset_filename = f'../datasets/{dataset}.gui'
pickle_filename = f'./feats_spects-pickles/{dataset_filename.split("/")[-1][:-4]}_feats.pkl'
if new_feats or not os.path.isfile(pickle_filename):
    spectrograms, labels = fe.extract_spectrograms(dataset_filename)
    with open(pickle_filename, 'wb') as f:
        pickle.dump([feats, labels], f)
else:
    with open(pickle_filename, 'rb') as f:
        feats, labels = pickle.load(f)
    print(f'{feats.shape[0]} features extracted')

1000 features extracted


In [4]:
scaler = StandardScaler()
scaled_feats = scaler.fit_transform(feats)

x_trainval, x_test, y_trainval, y_test = train_test_split(scaled_feats, labels, test_size=0.2)
x_train, x_val, y_train, y_val = train_test_split(x_trainval, y_trainval, test_size=0.25)

train_data = [(x, y) for x, y in zip(x_train, y_train)]
val_data = [(x, y) for x, y in zip(x_val, y_val)]
test_data = [(x, y) for x, y in zip(x_test, y_test)]

print(len(train_data), "training samples")
print(len(val_data), "validation samples")
print(len(test_data), "test samples")

600 training samples
200 validation samples
200 test samples


In [10]:
# Hyper-parameters 
input_size = 25
hidden_size = [256, 128, 64]
n_features = [256, 128, 64] #Get from xtrain (see BatchNorm1d)
kernel_size = 5

num_classes = 10
num_epochs = 100
batch_size = 32
learning_rate = 0.001

In [10]:
# Hyperparameters

input_size = 25
hidden_size = [256, 128, 64, 32]
num_classes = 10
n_features = [256, 128, 64] #Get from xtrain (see BatchNorm1d)
kernel_size = 5
train_batch_size = 100
valid_batch_size = 100

num_epochs = 1000
learning_rate = 0.0001

In [11]:
class CNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(CNNClassifier, self).__init__()
        self.conv1d1 = nn.Conv1d(input_size, hidden_size[0], kernel_size)
        self.conv1d1_bn = nn.BatchNorm1d(n_features, momentum=0.9)
        self.pool1 = torch.nn.MaxPool1d(kernel_size=2)
        self.dropout1 = nn.Dropout(0.5)
        
        self.conv1d2 = nn.Conv1d(input_size, hidden_size[1], kernel_size)
        self.conv1d2_bn = nn.BatchNorm1d(n_features, momentum=0.9)
        self.pool2 = torch.nn.MaxPool1d(kernel_size=2)
        self.dropout2 = nn.Dropout(0.5)
        
        self.conv1d3 = nn.Conv1d(input_size, hidden_size[2], kernel_size)
        self.conv1d3_bn = nn.BatchNorm1d(n_features, momentum=0.9)
        self.pool3 = torch.nn.MaxPool1d(kernel_size=2)
        self.dropout3 = nn.Dropout(0.5)
        
        self.fc = nn.Linear(hidden_size[0], num_classes)
        
    
    def forward(self, x):
        #order: conv, batchnorm, activation, maxpool, drop
        #Calls:
        #Convolution 1d: out = F.relu(self.conv1d1(x))
        #Batch           normalization: out = self.conv1d1_bn(out)
        #Dropout:        out = self.dropout1(out)
        #Max Pooling:    out = self.pool1(out)
        
        #Layer1
        out = F.relu(self.conv1d1_bn(self.conv1d1(x)))
        out = self.pool1(out)
        out = self.dropout1(out)
        
        #Layer2
        out = F.relu(self.conv1d2_bn(self.conv1d2(x)))
        out = self.pool2(out)
        out = self.dropout2(out)
        
        #Layer3
        out = F.relu(self.conv1d2_bn(self.conv1d2(x)))
        out = self.pool2(out)
        out = self.dropout2(out)
        
        #TimeDistributed
        
        
        #Time_distributed_merge_layer
        
        
        #Softmax
        out = self.softmax(self.fc(out))
        
        return out # we can return here the softmax, but then we should use the nll_loss instead of the cross_entropy
    

model = CNNClassifier(input_size, hidden_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 

In [12]:
def train(log_interval, model, device, data_batches, optimizer, epoch):
    losses = []
    len_glob_data = np.prod(np.array(np.array(data_batches).shape[:-1]))
    
    model.train()
    
    for i, batch in enumerate(data_batches):
        x, y = list(zip(*batch))
        
        # Move tensors to the configured device
        data = torch.from_numpy(np.array(x)).float().to(device)
        target = torch.from_numpy(np.array(y)).to(device)
        
        # clear all the gradients of the optimized tensors
        optimizer.zero_grad()
    
        # Forward pass
        output = model(data)
        loss = criterion(output, target)
        
        # Backward and optimize
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
        
        if (i+1) % log_interval == 0:
            print(f'Train Epoch: {epoch+1} [{(i+1)*len(data)}/{len_glob_data} ({str(int(100. * (i+1)*len(data) / len_glob_data)).zfill(2)}%)]\tLoss: {round(loss.item(),6)}')
    return np.array(losses).mean()

In [13]:
def validate(model, device, data_batches):
    len_glob_data = np.prod(np.array(np.array(data_batches).shape[:-1]))
    
    model.eval()  # let's put the model in evaluation mode

    validation_loss = []
    correct = 0
    with torch.no_grad():  # we don't need gradient computation at all
        for i, batch in enumerate(data_batches):
            x, y = list(zip(*batch))
        
            data = torch.from_numpy(np.array(x)).float().to(device)
            target = torch.from_numpy(np.array(y)).to(device)
            
            output = model(data)
            loss = criterion(output, target)
            
            validation_loss.append(loss.item())
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    validation_loss = np.array(validation_loss).mean()
    accuracy = 100. * correct / len_glob_data

    print(f'\nValidation set: Average loss: {round(validation_loss, 4)}, Accuracy: {correct}/{len_glob_data} ({int(accuracy)}%)\n')
    
    return accuracy, validation_loss

In [16]:
# Hyperparameters

input_size = 25
hidden_size = [256, 128, 64, 32]
num_classes = 10

train_batch_size = 100
valid_batch_size = 100

num_epochs = 1000
learning_rate = 0.0001

In [14]:
train_data_batches = data_batcher(train_data, train_batch_size)
val_data_batches = data_batcher(val_data, valid_batch_size)

model = CNNClassifier(input_size, hidden_size, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_losses = []
valid_losses = []
accuracies = []

for epoch in range(num_epochs):
    
    train_loss = train(2, model, device, train_data_batches, optimizer, epoch)
    train_losses.append(train_loss)

    accuracy, valid_loss = validate(model, device, val_data_batches)
    valid_losses.append(valid_loss)
    accuracies.append(accuracy)

RuntimeError: Expected 3-dimensional input for 3-dimensional weight 256 25, but got 2-dimensional input of size [100, 25] instead

In [None]:
fig = plt.figure()
fig.subplots_adjust(right=2.5)

ax1 = fig.add_subplot(121)
ax1.set_title('Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.plot(train_losses, 'b-', label='Train')
ax1.plot(valid_losses, 'r-', label='Validation')
ax1.legend(loc='upper right')

ax1 = fig.add_subplot(122)
ax1.set_title('Accuracy')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Accuracy (%)')
ax1.plot(accuracies, 'b-')

plt.show()