In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import os
from IPython.display import Image
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import torchvision.transforms as transforms
import torchvision
import torch.nn as nn
import torch
from sklearn.model_selection import train_test_split, StratifiedKFold

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

plt.rc('text', usetex=True)
plt.rc('font', family='serif')

# clear gpu memory cache
torch.cuda.empty_cache()

In [None]:
# Train set
mel_spectrogram_train = np.load('./Extracted_Data/melspectrogram_216.npz')['arr_0']
chromagram_train = np.load('./Extracted_Data/chromagram_216.npz')['arr_0']
mfcc_train = np.load('./Extracted_Data/mfcc_216.npz')['arr_0']
spectrogram_train = np.load('./Extracted_Data/spectrogram_216.npz')['arr_0']
tempogram_train = np.load('./Extracted_Data/tempogram_fourier_216.npz')['arr_0']
labels_train = np.load('./Extracted_Data/labels_216.npz')['arr_0']

# Test set
mel_spectrogram_test = np.load('./Extracted_Data/melspectrogram_216_test.npz')['arr_0']
chromagram_test = np.load('./Extracted_Data/chromagram_216_test.npz')['arr_0']
mfcc_test = np.load('./Extracted_Data/mfcc_216_test.npz')['arr_0']
spectrogram_test = np.load('./Extracted_Data/spectrogram_216_test.npz')['arr_0']
tempogram_test = np.load('./Extracted_Data/tempogram_fourier_216_test.npz')['arr_0']
labels_test = np.load('./Extracted_Data/labels_216_test.npz')['arr_0']

# Check shapes
print('----------Train set------------')
print('Mel Spectrogram : {}'.format(mel_spectrogram_train.shape))
print('Chromagram : {}'.format(chromagram_train.shape))
print('mfcc : {}'.format(mfcc_train.shape))
print('tempogram : {}'.format(tempogram_train.shape))
print('spectrogram : {}'.format(spectrogram_train.shape))
print('labels: {}'.format(labels_train.shape))
print('Class Distribution: {}'.format(np.unique(labels_train, return_counts=True)))

print('----------Test set------------')
print('Mel Spectrogram : {}'.format(mel_spectrogram_test.shape))
print('Chromagram : {}'.format(chromagram_test.shape))
print('mfcc : {}'.format(mfcc_test.shape))
print('tempogram : {}'.format(tempogram_test.shape))
print('spectrogram : {}'.format(spectrogram_test.shape))
print('labels: {}'.format(labels_test.shape))
print('Class Distribution: {}'.format(np.unique(labels_test, return_counts=True)))

In [None]:
# Make batch sizes even
mel_spectrogram_train = mel_spectrogram_train[:len(labels_train) - 42,:,:]
chromagram_train = chromagram_train[:len(labels_train) - 42,:,:]
mfcc_train = mfcc_train[:len(labels_train) - 42,:,:]
tempogram_train = tempogram_train[:len(labels_train) - 42,:,:]
spectrogram_train = spectrogram_train[:len(labels_train) - 42,:,:]
labels_train = labels_train[:len(labels_train) - 42]

mel_spectrogram_test = mel_spectrogram_test[:len(labels_test) - 8,:,:]
chromagram_test = chromagram_test[:len(labels_test) - 8,:,:]
mfcc_test = mfcc_test[:len(labels_test) - 8,:,:]
tempogram_test = tempogram_test[:len(labels_test) - 8,:,:]
spectrogram_test = spectrogram_test[:len(labels_test) - 8,:,:]
labels_test = labels_test[:len(labels_test) - 8]

# Check shapes
print('----------Train set------------')
print('Mel Spectrogram : {}'.format(mel_spectrogram_train.shape))
print('Chromagram : {}'.format(chromagram_train.shape))
print('mfcc : {}'.format(mfcc_train.shape))
print('tempogram : {}'.format(tempogram_train.shape))
print('spectrogram : {}'.format(spectrogram_train.shape))
print('labels: {}'.format(labels_train.shape))
print('Class Distribution: {}'.format(np.unique(labels_train, return_counts=True)))

print('----------Test set------------')
print('Mel Spectrogram : {}'.format(mel_spectrogram_test.shape))
print('Chromagram : {}'.format(chromagram_test.shape))
print('mfcc : {}'.format(mfcc_test.shape))
print('tempogram : {}'.format(tempogram_test.shape))
print('spectrogram : {}'.format(spectrogram_test.shape))
print('labels: {}'.format(labels_test.shape))
print('Class Distribution: {}'.format(np.unique(labels_test, return_counts=True)))

# Save shapes for mel_spectrogram and chromagram
shape_mel = mel_spectrogram_train.shape
shape_chroma = chromagram_train.shape
shape_mfcc = mfcc_train.shape
shape_tempo = tempogram_train.shape
shape_spec = spectrogram_train.shape

In [None]:
# shuffle samples in train set
temp = np.arange(0,len(labels_train))
shuffle_indices = np.random.permutation(temp)
print("shuffled indices: {}".format(shuffle_indices))

mel_spectrogram_train = mel_spectrogram_train[shuffle_indices]
chromagram_train = chromagram_train[shuffle_indices]
mfcc_train = mfcc_train[shuffle_indices]
tempogram_train = tempogram_train[shuffle_indices]
spectrogram_train = spectrogram_train[shuffle_indices]
labels_train = labels_train[shuffle_indices]

# Check shapes
print('----------Train set------------')
print('Mel Spectrogram : {}'.format(mel_spectrogram_train.shape))
print('Chromagram : {}'.format(chromagram_train.shape))
print('mfcc : {}'.format(mfcc_train.shape))
print('tempogram : {}'.format(tempogram_train.shape))
print('spectrogram : {}'.format(spectrogram_train.shape))
print('labels: {}'.format(labels_train.shape))
print('Class Distribution: {}'.format(np.unique(labels_train, return_counts=True)))

## CNN

In [None]:
# create class for Fashion Dataset for Pytorch Dataloader
class MusicDataset(Dataset):

    def __init__(self, dataset1, dataset2, labels, shape1, shape2, transform=None):

        # Initialize variables
        self.spec = dataset1
        self.chroma = dataset2
        self.transform = transform

        # Create and reshape attributes
        self.labels = labels.astype(np.int32)
        self.images_spec = np.array(self.spec.reshape(-1, shape1[1], shape1[2], 1), dtype=np.float32)
        self.images_chroma = np.array(
            self.chroma.reshape(-1, shape2[1], shape2[2], 1), dtype=np.float32)

    def __getitem__(self, idx):
        # get entries from dataset
        label = self.labels[idx]
        image1 = self.images_spec[idx]
        image2 = self.images_chroma[idx]
        
        # transform to pytorch tensor
        if self.transform is not None:
            image1 = self.transform(image1)
            image2 = self.transform(image2)

        return image1, image2, label

    def __len__(self):
        return self.images_spec.shape[0]


In [None]:
# Setup training and test set for dataloader
# Select training and test set
# Option - 1: Mel/chroma 
# Option - 2: mfcc/chroma
# Option - 3: mel/spec
# Option - 4: spec/tempo
# Option - 5: mel/tempo
# Option - 6: mfcc/mel

option = 6

if option == 1:
    #------------------------------------------------------------------------------------------#
    # Option -1 : Mel and Chroma
    train_set = MusicDataset(mel_spectrogram_train, chromagram_train, labels_train, shape_mel, shape_chroma,
       transform=transforms.Compose([transforms.ToTensor()]))

    test_set=MusicDataset(mel_spectrogram_test, chromagram_test, labels_test, shape_mel, shape_chroma,
       transform = transforms.Compose([transforms.ToTensor()]))
    
    shape1, shape2 = shape_mel, shape_chroma
    choice = 'mel_chroma'
    
elif option == 2:
    #------------------------------------------------------------------------------------------#
    # Option -2 : Mfcc and chroma
    train_set = MusicDataset(mfcc_train, chromagram_train, labels_train, shape_mfcc, shape_chroma,
       transform=transforms.Compose([transforms.ToTensor()]))

    test_set=MusicDataset(mfcc_test, chromagram_test, labels_test, shape_mfcc, shape_chroma,
       transform = transforms.Compose([transforms.ToTensor()]))
    
    shape1, shape2 = shape_mfcc, shape_chroma
    choice = 'mfcc_chroma'

elif option == 3:
    #------------------------------------------------------------------------------------------#
    # Option -3 : Mel and spectrogram
    train_set = MusicDataset(mel_spectrogram_train, spectrogram_train, labels_train, shape_mel, shape_spec,
       transform=transforms.Compose([transforms.ToTensor()]))

    test_set=MusicDataset(mel_spectrogram_test, spectrogram_test, labels_test, shape_mel, shape_spec,
       transform = transforms.Compose([transforms.ToTensor()]))
    
    shape1, shape2 = shape_mel, shape_spec
    choice = 'mel_spec'
    
elif option == 4:
    #------------------------------------------------------------------------------------------#
    # Option -4 : spectrogram and tempogram
    train_set = MusicDataset(spectrogram_train, tempogram_train, labels_train, shape_spec, shape_tempo,
       transform=transforms.Compose([transforms.ToTensor()]))

    test_set=MusicDataset(spectrogram_test, tempogram_test, labels_test, shape_spec, shape_tempo,
       transform = transforms.Compose([transforms.ToTensor()]))
    
    shape1, shape2 = shape_spec, shape_tempo
    choice = 'spec_tempo'
    
elif option == 5:
    #------------------------------------------------------------------------------------------#
    # Option -5 : mel and tempogram
    train_set = MusicDataset(mel_spectrogram_train, tempogram_train, labels_train, shape_mel, shape_tempo,
       transform=transforms.Compose([transforms.ToTensor()]))

    test_set=MusicDataset(mel_spectrogram_test, tempogram_test, labels_test, shape_mel, shape_tempo,
       transform = transforms.Compose([transforms.ToTensor()]))
    
    shape1, shape2 = shape_mel, shape_tempo
    choice = 'mel_tempo'

else:
    #------------------------------------------------------------------------------------------#
    # Option -6 : Mfcc and Mel
    train_set = MusicDataset(mfcc_train, mel_spectrogram_train, labels_train, shape_mfcc, shape_mel,
        transform=transforms.Compose([transforms.ToTensor()]))

    test_set=MusicDataset(mfcc_test, mel_spectrogram_test, labels_test, shape_mfcc, shape_mel,
        transform = transforms.Compose([transforms.ToTensor()]))
    
    shape1, shape2 = shape_mfcc, shape_mel
    choice = 'mfcc_mel'


In [None]:
# Define MLP Class for Fashion MNIST
class MusicCNN(nn.Module):
    def __init__(self, option):
        super(MusicCNN, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32,
                      kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(negative_slope=0.1),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(negative_slope=0.1),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32,
                      kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(negative_slope=0.1),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(negative_slope=0.1),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Mel Spectrogram - 50,880, chromagram-6,784, mfcc-13,568
        # Sectrogram - 105,152, tempogram - 88,192
        if option == 1:
            self.fc1 = nn.Sequential(
                nn.Linear(in_features=50880+6784, out_features=1200),
                nn.LeakyReLU(negative_slope=0.1)
            )
        elif option == 2:
             self.fc1 = nn.Sequential(
                nn.Linear(in_features=13568+6784, out_features=1200),
                nn.LeakyReLU(negative_slope=0.1)
            )
        elif option == 3:
             self.fc1 = nn.Sequential(
                nn.Linear(in_features=50880+105152, out_features=1200),
                nn.LeakyReLU(negative_slope=0.1)
            )
        elif option == 4:
             self.fc1 = nn.Sequential(
                nn.Linear(in_features=105152+88192, out_features=1200),
                nn.LeakyReLU(negative_slope=0.1)
            )
        elif option == 5:
             self.fc1 = nn.Sequential(
                nn.Linear(in_features=50880+88192, out_features=1200),
                nn.LeakyReLU(negative_slope=0.1)
            )
        else:
             self.fc1 = nn.Sequential(
                nn.Linear(in_features=13568+50880, out_features=1200),
                nn.LeakyReLU(negative_slope=0.1)
            )
                
        self.drop1 = nn.Dropout(0.3)

        self.fc2 = nn.Sequential(
            nn.Linear(in_features=1200, out_features=10),
            nn.LeakyReLU(negative_slope=0.1)
        )
        self.drop2 = nn.Dropout(0.3)

        self.fc3 = nn.Linear(in_features=10, out_features=2)

    def forward(self, x_mel, x_chroma):
        #input 1
        out1 = self.layer1(x_mel)
        out1 = self.layer2(out1)
        out1 = out1.view(out1.size(0), -1)
        # input 2
        out2 = self.layer3(x_chroma)
        out2 = self.layer4(out2)
        out2 = out2.view(out2.size(0), -1)
        
        # Concat and process
        out = torch.cat((out1,out2), dim=1)
        
        # Output
        out = self.fc1(out)
        out = self.drop1(out)
        out = self.fc2(out)
        out = self.drop2(out)
        out = self.fc3(out)

        return out


In [None]:
# Define hyperparameters
# Since spectrogram and tempogram have larger size
if option == 3 or option == 4 or option == 5:
    batch = 20
    num_epochs = 8
else:
    batch = 100
    num_epochs = 10 
    
   
learning_rate = 1e-4

# Define device
#if option == 3 or option == 4 or option == 5:
#    device = torch.device("cpu")
#else:
#    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print("Device", device)

# Define model
# Use saved model
#model = torch.load('./Models/CNN_{}.pth'.format(choice))
# New model
model = MusicCNN(option)
model.to(device)

# Define loss and optimizer
error = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Setup data loader
train_loader = DataLoader(train_set, batch_size=batch)

print(model)

In [None]:
torch.set_printoptions(precision=3)
Output_CNN_train = []

for epoch in range(num_epochs):
    "--------------------------------------------------------------------------------------"
    "------------------------------Training phase------------------------------------------"
    "--------------------------------------------------------------------------------------"
    Loss_train, correct_train = 0.0, 0

    # Setup model for training (using dropout)
    model.train()

    for image1, image2, labels in train_loader:
        # Convert labels to long tensor
        labels = labels.type(torch.LongTensor)
        
        # Send data to GPU/CPU        
        image1,image2, labels = image1.to(device),image2.to(device), labels.to(device)
                
        # Convert to batch first
        image1 = image1.view(batch, 1, shape1[1], shape1[2])
        image2 = image2.view(batch, 1, shape2[1], shape2[2])
        
        # Forward pass
        outputs = model(image1, image2)
        loss = error(outputs, labels)

        # Initializing a gradient as 0 so there is no mixing of gradient among the batches
        optimizer.zero_grad()

        #Propagating the error backward
        loss.backward()

        # Optimizing the parameters
        optimizer.step()

        Loss_train += loss.item()
        scores, predictions = torch.max(outputs.data,1)
        correct_train += (predictions == labels).sum().item()
        
        # Save output
        if epoch == num_epochs - 1:
            Output_CNN_train.append(predictions.detach().cpu().numpy())
        
    "--------------------------------------------------------------------------------------"
    # Display results
    Acc_train = correct_train/len(train_loader.sampler) * 100

    print("Epoch:{}/{} | Train Loss:{:.2f} | Train Acc:{:.2f}".format(epoch+1,
                                                num_epochs, Loss_train, Acc_train))


In [None]:
temp_train = np.array(Output_CNN_train).reshape(-1)
print(temp_train.shape)
np.save('./Output_CNN/Ouput_CNN_{}_train'.format(choice),temp_train)

In [None]:
# test output

# Setup data loader
batch = 100
test_loader = DataLoader(test_set, batch_size=batch)
Output_CNN = []


with torch.no_grad():
    Loss_test, correct_test = 0.0, 0

    # Setup model for training (using dropout)
    model.eval()

    for image1, image2, labels in test_loader:
        # Convert labels to long tensor
        labels = labels.type(torch.LongTensor)
        
        # Send data to GPU/CPU        
        image1,image2, labels = image1.to(device),image2.to(device), labels.to(device)
                
        # Convert to batch first
        image1 = image1.view(batch, 1, shape1[1], shape1[2])
        image2 = image2.view(batch, 1, shape2[1], shape2[2])

        # Forward pass
        outputs = model(image1, image2)
        loss = error(outputs, labels)

        # Calculate accuracy
        Loss_test += loss.item()
        scores, predictions = torch.max(outputs.data,1)
        correct_test += (predictions == labels).sum().item()
        
        # Save output
        Output_CNN.append(predictions.detach().cpu().numpy())

    "--------------------------------------------------------------------------------------"
    # Display results
    Acc_test = correct_test/len(test_loader.sampler) * 100
    print('Test set accuracy: {:.2f}'.format(Acc_test))


In [None]:
torch.save(model,'./Models/CNN_{}.pth'.format(choice))

In [None]:
temp = np.array(Output_CNN).reshape(-1)
cm = confusion_matrix(labels_test, temp)
print(cm)

In [None]:
Acc_perclass = cm.diagonal()/cm.sum(axis=1)

print('Accuracy Non-Progrock: {:.2f}'.format(Acc_perclass[0]*100))
print('Accuracy Progrock: {:.2f}'.format(Acc_perclass[1]*100))

In [None]:
# Display Confusion Matrix per segment

cm_disp = ConfusionMatrixDisplay(cm, display_labels=['Non Prog', 'Prog'])

cm_disp.plot();
cm_disp.ax_.set(title='Confusion Matrix', xlabel='Predicted', ylabel='Actual');
plt.show();

plt.savefig('./plots/cm_{}'.format(choice),format='png', backend='cairo', bbox_inches='tight');

In [None]:
np.save('./Output_CNN/Ouput_CNN_{}_test'.format(choice),temp)