# TONG CODE

# Data Loading

## Imports

In [163]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
from joblib import dump, load
import warnings
warnings.filterwarnings('ignore') 

import librosa
from librosa import display

from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.utils.multiclass import unique_labels
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import CCA
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

import json

## Load Data

In [2]:
# this is all sorts of messy

In [3]:
all_genres = ['blues','classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']
num_songs = 100
sr = 22050
Y_LIMIT = 660000

In [4]:
class DataLoader():
    def __init__(self, seg=5):
        '''
        Initializes the DataLoader.
        
        Forces deteriminism by setting np.seed=1.
        
        self.SEG is the number of fixed-length segments, and the number of random crops to take
        self.SEG_LENGTH is the length of each fixed-length segment
        self.RANDOM_SEG_LENGTH is the length of each random crop
        
        self.train_idxs is a list of 750 training indices
        self.test_idxs is a list of 250 testing indices
        
        self.train_crop_idxs is a list of 750*self.SEG training indices, corresponding to the same indices above
        self.test_crop_idxs  is a list of 750*self.SEG testing indices,  corresponding to the same indices above
        
        eg. train_idxs      = [1, 2, 4, ...]
            train_crop_idxs = [10 ... 19, 20 ... 29, 40 ... 49, ...]
            
        Cropped X's retain the order of the uncropped X's
        i.e. the first 10 self.mfcc_fixed_crop entries correspond to the first self.mfcc entry.
        '''
        np.random.seed(1)
        self.X_mfcc = None
        self.X_mfcc_random_crop = None
        self.X_mfcc_fixed_crop = None
        
        self.X_chroma = None
        self.X_chroma_random_crop = None
        self.X_chroma_fixed_crop = None
        
        self.Y = None
        self.Y_crop = None
        
        self.SEG = seg   # Must evenly divide 30
        self.SEG_LENGTH = int(1290/self.SEG)
        self.RANDOM_SEG_LENGTH = 1200
        self.PAD = 1290 - self.RANDOM_SEG_LENGTH
        self.RANDOM_STARTS = np.random.randint(low=0, high=1290-self.RANDOM_SEG_LENGTH, size=(1000, self.SEG))
        
        self.train_idxs = np.sort(np.random.choice(np.array([i for i in range(1000)]), size=500, replace=False))
        self.val_idxs = np.sort(np.random.choice(np.array([i for i in range(1000) if i not in self.train_idxs]), size=250, replace=False))
        self.test_idxs = np.array([i for i in range(1000) if i not in self.train_idxs and i not in self.val_idxs])

        self.train_crop_idxs = np.hstack([np.array([i*self.SEG+j for j in range(self.SEG)]) for i in self.train_idxs])
        self.val_crop_idxs = np.hstack([np.array([i*self.SEG+j for j in range(self.SEG)]) for i in self.val_idxs])
        self.test_crop_idxs = np.hstack([np.array([i*self.SEG+j for j in range(self.SEG)]) for i in self.test_idxs])

    def save_mfcc(self, genres=all_genres, songs=num_songs):
        '''
        Saves MFCC Coefficients.
        Produces a 1000 x 16770 array.
        '''
        assert(self.X_mfcc is None)
        X_mfcc = None
        for g_idx, g in enumerate(genres):
            for s_idx in range(songs):
                y, sr = librosa.load(f'genres/{g}/{g}.000{s_idx:02d}.au')
                y = y[:Y_LIMIT]
                mfcc = librosa.feature.mfcc(y, sr=sr, hop_length=512, n_mfcc=13).flatten()
                if X_mfcc is None:
                    X_mfcc = mfcc.reshape(1, mfcc.shape[0])
                else:
                    X_mfcc = np.vstack([X_mfcc, mfcc])
        scaler = StandardScaler()
        self.X_mfcc = scaler.fit_transform(X_mfcc)
        np.savetxt('data/X_mfcc.csv', self.X_mfcc)
    
    def save_mfcc_random_crop(self):
        '''
        Saves self.SEG random crops of MFCC for every original training sample.
        
        Produces a 10000 x 13 x 1200 array, padded with zeros to 10000 x 13 x 1290.
        Reshapes into 10000*16770 for the CSV.
        '''
        assert(self.X_mfcc_random_crop is None and self.X_mfcc is not None)
        X_mfcc_crop = None
        for i, mfcc in enumerate(self.X_mfcc):
            crop = None
            for j in range(self.SEG):
                random_start = self.RANDOM_STARTS[i][j]
                random_seg = np.vstack([mfcc[1290*k+random_start : 1290*k+random_start+self.RANDOM_SEG_LENGTH] for k in range(13)])
                random_seg = np.pad(random_seg, ((0, 0), (0, self.PAD)), 'constant')
                random_seg = random_seg.reshape(1, random_seg.shape[0], random_seg.shape[1])
                if crop is None:
                    crop = random_seg
                else:
                    crop = np.vstack([crop, random_seg])
            if X_mfcc_crop is None:
                X_mfcc_crop = crop
            else:
                X_mfcc_crop = np.vstack([X_mfcc_crop, crop])
        self.X_mfcc_random_crop = X_mfcc_crop
        np.savetxt(f'data/X_mfcc_random_crop_{self.SEG}.csv', X_mfcc_crop.reshape(1000*self.SEG, 13*1290))
    
    def save_mfcc_fixed_crop(self):
        '''
        Saves self.SEG even segments of MFCC for every original training sample.
        
        Produces a 10000 x 13 x 129 array of MFCC coefficients for the segments.
        Reshapes into 10000*1677 for the CSV.
        '''
        assert(self.X_mfcc_fixed_crop is None and self.X_mfcc is not None)
        X_mfcc_crop = None
        for mfcc in self.X_mfcc:
            crop = np.stack([np.vstack([mfcc[1290*j+self.SEG_LENGTH*i : 1290*j+self.SEG_LENGTH*(i+1)] for j in range(13)]) for i in range(self.SEG)], axis=0)
            if X_mfcc_crop is None:
                X_mfcc_crop = crop
            else:
                X_mfcc_crop = np.vstack([X_mfcc_crop, crop])
        self.X_mfcc_fixed_crop = X_mfcc_crop
        np.savetxt(f'data/X_mfcc_fixed_crop_{self.SEG}.csv', X_mfcc_crop.reshape(1000*self.SEG, 13*self.SEG_LENGTH))
    
    def save_chroma(self, genres=all_genres, songs=num_songs):
        '''
        Saves Chromas.
        Produces a 1000 x 15480 array.
        '''
        assert(self.X_chroma is None)
        X_chroma = None
        for g_idx, g in enumerate(genres):
            for s_idx in range(songs):
                y, sr = librosa.load(f'genres/{g}/{g}.000{s_idx:02d}.au')
                y = y[:Y_LIMIT]
                chroma = librosa.feature.chroma_cqt(y, sr=sr, hop_length=512).flatten()
                if X_chroma is None:
                    X_chroma = chroma.reshape(1, chroma.shape[0])
                else:
                    X_chroma = np.vstack([X_chroma, chroma])
        scaler = StandardScaler()
        self.X_chroma = scaler.fit_transform(X_chroma)
        np.savetxt('data/X_chroma.csv', self.X_chroma)

    def save_chroma_random_crop(self):
        '''
        Saves 10 random crops of Chromas for every original training sample.
        
        Produces a 10000 x 12 x 1200 array, padded with zeros to 10000 x 12 x 1290.
        Reshapes into 10000*15480 for the CSV.
        '''
        assert(self.X_chroma_random_crop is None and self.X_chroma is not None)
        X_chroma_crop = None
        for i, chroma in enumerate(self.X_chroma):
            crop = None
            for j in range(self.SEG):
                random_start = self.RANDOM_STARTS[i][j]
                random_seg = np.vstack([chroma[1290*k+random_start : 1290*k+random_start+self.RANDOM_SEG_LENGTH] for k in range(12)])
                random_seg = np.pad(random_seg, ((0, 0), (0, self.PAD)), 'constant')
                random_seg = random_seg.reshape(1, random_seg.shape[0], random_seg.shape[1])
                if crop is None:
                    crop = random_seg
                else:
                    crop = np.vstack([crop, random_seg])
            if X_chroma_crop is None:
                X_chroma_crop = crop
            else:
                X_chroma_crop = np.vstack([X_chroma_crop, crop])
        self.X_chroma_random_crop = X_chroma_crop
        np.savetxt(f'data/X_chroma_random_crop_{self.SEG}.csv', X_chroma_crop.reshape(1000*self.SEG, 12*1290))
        
    def save_chroma_fixed_crop(self):
        '''
        Saves 10 even segments of Chromas for every original training sample.
        
        Produces a 10000 x 12 x 129 array of MFCC coefficients for the segments.
        Reshapes into 10000*1548 for the CSV.
        '''
        assert(self.X_chroma_fixed_crop is None and self.X_chroma is not None)
        X_chroma_crop = None
        for chroma in self.X_chroma:
            crop = np.stack([np.vstack([chroma[1290*j+self.SEG_LENGTH*i : 1290*j+self.SEG_LENGTH*(i+1)] for j in range(12)]) for i in range(self.SEG)], axis=0)
            if X_chroma_crop is None:
                X_chroma_crop = crop
            else:
                X_chroma_crop = np.vstack([X_chroma_crop, crop])
        self.X_chroma_fixed_crop = X_chroma_crop
        np.savetxt(f'data/X_chroma_fixed_crop_{self.SEG}.csv', X_chroma_crop.reshape(1000*self.SEG, 12*self.SEG_LENGTH))
    
    '''
    If X_mfcc has been saved, but we aborted before saving X_mfcc_random_crop (or X_mfcc_fixed_crop), 
    we can call load_mfcc with tensor=False to load the MFCC in 2D and then call dl.save_random_crop().
    Note that all load functions reshape into tensors by default.
    '''  
    
    def load_mfcc(self, tensor=True):
        self.X_mfcc = np.loadtxt('data/X_mfcc.csv')
        if tensor:
            self.X_mfcc = self.X_mfcc.reshape(1000, 13, 1290)
    
    def load_mfcc_random_crop(self):
        self.X_mfcc_random_crop = np.loadtxt(f'data/X_mfcc_random_crop_{self.SEG}.csv').reshape(1000*self.SEG, 13, 1290)
        
    def load_mfcc_fixed_crop(self):
        self.X_mfcc_fixed_crop = np.loadtxt(f'data/X_mfcc_fixed_crop_{self.SEG}.csv').reshape(1000*self.SEG, 13, self.SEG_LENGTH)
        
    def load_chroma(self, tensor=True):
        self.X_chroma = np.loadtxt('data/X_chroma.csv')
        if tensor:
            self.X_chroma = self.X_chroma.reshape(1000, 12, 1290)
    
    def load_chroma_random_crop(self):
        self.X_chroma_random_crop = np.loadtxt(f'data/X_chroma_random_crop_{self.SEG}.csv').reshape(1000*self.SEG, 12, 1290)
        
    def load_chroma_fixed_crop(self):
        self.X_chroma_fixed_crop = np.loadtxt(f'data/X_chroma_fixed_crop_{self.SEG}.csv').reshape(1000*self.SEG, 12, self.SEG_LENGTH)
    
    def load_Y(self):
        self.Y = np.array([int(i/100) for i in range(1000)]).ravel()
        
    def load_Y_crop(self):
        self.Y_crop = np.array([int(i/(100 * self.SEG)) for i in range(1000 * self.SEG)]).ravel()
        
    def train_test_val_split(self, data, is_cropped):
        '''
        Splits an X_data into train, validation, and test sets.
        
        is_cropped=True for splitting random or fixed crops, iscropped=False for splitting original mfcc/chroma
        
        Train, val, and test indices are consistent every time train_test_split is called.
        
        Furthermore, cropped train and test indices are returned so that batches of size self.SEG are together, 
        with each batch corresponding to a single non-cropped index.
        '''
        if is_cropped:
            return np.take(data, self.train_crop_idxs, 0), np.take(data, self.val_crop_idxs, 0), np.take(data, self.test_crop_idxs, 0)
        else:
            return np.take(data, self.train_idxs, 0), np.take(data, self.val_idxs, 0), np.take(data, self.test_idxs, 0)

In [5]:
# %%time
# # # Saves features to data/...  (run once!!)
# dl = DataLoader(5)
# dl.save_mfcc()
# dl.save_mfcc_fixed_crop()
# dl.save_mfcc_random_crop()
# dl.save_chroma()
# dl.save_chroma_fixed_crop()
# dl.save_chroma_random_crop()

In [6]:
%%time
# # Load from CSVs in data/...
dl = DataLoader(5)
dl.load_mfcc()
dl.load_mfcc_fixed_crop()
dl.load_mfcc_random_crop()
dl.load_chroma()
dl.load_chroma_fixed_crop()
dl.load_chroma_random_crop()
dl.load_Y()
dl.load_Y_crop()

CPU times: user 3min 40s, sys: 25.4 s, total: 4min 6s
Wall time: 4min 11s


In [7]:
dl.X_mfcc.shape, dl.X_mfcc_random_crop.shape, dl.X_mfcc_fixed_crop.shape, dl.X_chroma.shape, dl.X_chroma_random_crop.shape, dl.X_chroma_fixed_crop.shape

((1000, 13, 1290),
 (5000, 13, 1290),
 (5000, 13, 258),
 (1000, 12, 1290),
 (5000, 12, 1290),
 (5000, 12, 258))

## Training and Test sets for each part

In [8]:
# Train on X_train (750),
# Test on X_test (250)
print('Train, validate, and test the vanilla model\n')

Xm_train, Xm_val, Xm_test = dl.train_test_val_split(dl.X_mfcc, is_cropped=False)
Xc_train, Xc_val, Xc_test = dl.train_test_val_split(dl.X_chroma, is_cropped=False)

X_train = np.concatenate([Xm_train, Xc_train], axis=1)
X_val = np.concatenate([Xm_val, Xc_val], axis=1)
X_test = np.concatenate([Xm_test, Xc_test], axis=1)

Y_train, Y_val, Y_test = dl.train_test_val_split(dl.Y, is_cropped=False)

print(f'Train size:    {X_train.shape}', f'     Train labels size: {Y_train.shape}')
print(f'Val size:      {X_val.shape}', f'     Val labels size:   {Y_val.shape}')
print(f'Test size:     {X_test.shape}', f'     Test labels size:  {Y_test.shape}')

Train, validate, and test the vanilla model

Train size:    (500, 25, 1290)      Train labels size: (500,)
Val size:      (250, 25, 1290)      Val labels size:   (250,)
Test size:     (250, 25, 1290)      Test labels size:  (250,)


In [9]:
# Train on X_mfcc_fixed_crop_train (7500),
# Test on X_mfcc_fixed_crop_test (2500),
# Aggregate into predictions on X_mfcc_test (250)
print('Train on segments, validate and test by accumulating votes of segments\n')

Xmf_train, Xmf_val, Xmf_test = \
    dl.train_test_val_split(dl.X_mfcc_fixed_crop, is_cropped=True)
Xcf_train, Xcf_val, Xcf_test = \
    dl.train_test_val_split(dl.X_chroma_fixed_crop, is_cropped=True)

Xf_train = np.concatenate([Xmf_train, Xcf_train], axis=1)
Xf_val = np.concatenate([Xmf_val, Xcf_val], axis=1)
Xf_test = np.concatenate([Xmf_test, Xcf_test], axis=1)

Y_crop_train, _, _ = dl.train_test_val_split(dl.Y_crop, is_cropped=True)

print(f'Train size:  {Xf_train.shape}', f'     Train labels size: {Y_crop_train.shape}')
print(f'Val size:    {Xf_val.shape}', f'     Val labels size:   {Y_val.shape}')
print(f'Test size:   {Xf_test.shape}', f'     Test labels size:  {Y_test.shape}')

Train on segments, validate and test by accumulating votes of segments

Train size:  (2500, 25, 258)      Train labels size: (2500,)
Val size:    (1250, 25, 258)      Val labels size:   (250,)
Test size:   (1250, 25, 258)      Test labels size:  (250,)


In [10]:
# Train on X_mfcc_random_crop_train (7500),
# Test on X_mfcc_test (250)
print('Train on padded random crops, validate and test on uncropped test\n')

Xmr_train, _, _ = dl.train_test_val_split(dl.X_mfcc_random_crop, is_cropped=True)
Xcr_train, _, _ = dl.train_test_val_split(dl.X_chroma_random_crop, is_cropped=True)

Xr_train = np.concatenate([Xmr_train, Xcr_train], axis=1)

Y_crop_train, _, _ = dl.train_test_val_split(dl.Y_crop, is_cropped=True)

print(f'Train size:   {Xr_train.shape}', f'    Train labels size: {Y_crop_train.shape}')
print(f'Val size:     {X_val.shape}', f'     Val labels size:   {Y_val.shape}')
print(f'Test size:    {X_test.shape}', f'     Test labels size:  {Y_test.shape}')

Train on padded random crops, validate and test on uncropped test

Train size:   (2500, 25, 1290)     Train labels size: (2500,)
Val size:     (250, 25, 1290)      Val labels size:   (250,)
Test size:    (250, 25, 1290)      Test labels size:  (250,)


## PyTorch DataLoader

In [105]:
# making torch style dataset and dataloader

import torch
from torch.utils import data

class GenreDataset(data.Dataset):

    def __init__(self, X, Y = None):
        self.X = torch.from_numpy(X).unsqueeze(1)
        if type(Y) != type(None):
            self.Y = torch.from_numpy(Y)
        else:
            self.Y = None
            
    def __len__(self):
        if type(self.Y) != type(None):
            assert(self.X.size()[0] == self.Y.size()[0])
        return self.X.size()[0]

    def __getitem__(self, index):
        data = self.X[index]
        if type(self.Y) != type(None):
            label = self.Y[index]
        else:
            label = 0
        return (data, label)

# Models

In [12]:
import torch.nn as nn
import torch.nn.functional as F
import math
from torchsummary import summary

## CNN + RNN

In [233]:
class CNN(nn.Module):
    def __init__(self, params):
        super(CNN, self).__init__()
        
        num_features = params['cnn_in_features']
        num_filters_1 = params['cnn_filters_1']
        ker_1 = params['cnn_kernel_1']
        out_features = params['rnn_in_size']
        
        out_size = num_features - ker_1[0] + 1
        
        self.conv1 = nn.Conv2d(1, num_filters_1, ker_1)
#         self.dropout = nn.Dropout(p=0.5)
#         self.bnorm = nn.BatchNorm2d(num_features = num_filters_1)
        
        self.fc = nn.Linear(num_filters_1*out_size, out_features)
        
    def forward(self, x):
        out = self.conv1(x)
        # out.size() = N x out_channels x out_size x time
        
#         out = self.dropout(out)
        # out.size() = N x out_channels x out_size x time
        
#         out = self.bnorm(out)
        # out.size() = N x out_channels x out_size x time
        
        out = F.relu(out)
        # out.size() = N x out_channels x out_size x time
        
        out = out.permute(0,3,1,2)
        # out.size() = N x time x out_channels x out_size
        
        out = out.view(out.size()[0], out.size()[1], -1)
        # out.size() = N x time x out_channels*out_size
        
        out = self.fc(out)
        # out.size() = N x time x out_features
        
        out = F.relu(out)
        # out.size() = N x time x out_features
        
        return out
    
class RNN(nn.Module):
    def __init__(self, params):
        super(RNN, self).__init__()
        
        in_size = params['rnn_in_size']
        hid_size = params['rnn_hid_size']
        dropout = params['rnn_dropout']
        
        self.lstm = nn.LSTM(in_size, hid_size, batch_first=True, dropout = dropout)
    
    def forward(self, x):
        out, hidden = self.lstm(x)
        # out.size() = N x time x hid_size
        return out

class CNN_RNN(nn.Module):
    def __init__(self, params):
        super(CNN_RNN, self).__init__()
        
        out_size = params['rnn_hid_size']
        
        self.CNN = CNN(params)
        self.RNN = RNN(params)
        
        self.fc = nn.Linear(out_size, 10)
        
    def forward(self, x):
        out = self.CNN(x)
        # out.size() = N x time x out_features
        
        out = self.RNN(out)
        # out.size() = N x time x out_features
        
        out = out[:,-1,:].squeeze()
        # out.size() = N x out_features
        
        out = self.fc(out)
        # out.size() = N x 10
        
        return out

# Training and Testing

In [195]:
# defining training and testing functions

def train(net, criterion, optimizer, num_epochs, trainloader):
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)
    
    net.train()
    
    for epoch in range(num_epochs):
        
        correct = 0
        total = 0
        
        print("Epoch: " + str(epoch+1))
        running_loss = 0.0
        for data in trainloader:
            
            inputs, labels = data
            
            inputs = inputs.to(device).float()
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            outputs = net(inputs)
            
            _ , predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)            
            
            correct += (predicted == labels).sum().item()            
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()            
            running_loss += loss.item()
            
        print("Loss: " + str(running_loss / 750.0) + ' Accuracy: ' + str((100 * correct / total)) + '%')
    print('Finished Training')
    
def predictions(net, loader):
    # return numpy array of model's predictions
    
    separate_predictions = []
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)
    
    net.eval()
    
    with torch.no_grad():
        for data in loader:
            
            inputs, _ = data
            inputs = inputs.to(device).float()

            outputs = net(inputs)
            _ , predicted = torch.max(outputs.data, 1)
            separate_predictions.append(predicted.numpy())
    
    predictions = np.concatenate(separate_predictions)
    return predictions

def test(net, loader):
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)
    
    net.eval()
    
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data in loader:
            inputs, labels = data
            inputs = inputs.to(device).float()
            labels = labels.to(device)
            
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    acc = 100*correct/total
    
    print('Test accuracy of the network: ' + str(acc) + '%')

    return acc

In [None]:
def view_progression(net, criterion, optimizer, num_epochs, trainloader, testloader):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)
    
    net.train()
    
    for epoch in range(num_epochs):
        
        correct = 0
        total = 0
        
        print("Epoch: " + str(epoch+1))
        running_loss = 0.0
        for data in trainloader:
            
            inputs, labels = data
            
            inputs = inputs.to(device).float()
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            outputs = net(inputs)
            
            _ , predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)            
            
            correct += (predicted == labels).sum().item()            
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()            
            running_loss += loss.item()
            
        print("Loss: " + str(running_loss / 750.0) + ' Accuracy: ' + str((100 * correct / total)) + '%')
        test(net, testloader)
    print('Finished Training')

# Evaluating Model

In [16]:
print(X_train.shape, X_val.shape, X_test.shape, Y_train.shape)

(500, 25, 1290) (250, 25, 1290) (250, 25, 1290) (500,)


In [144]:
# attempt at regularization

class WeightClipper():

    def __init__(self):
        pass

    def __call__(self, module):
        if hasattr(module, 'weight'):
            w = module.weight.data
            w = w.clamp(-1,1)

clipper = WeightClipper()

In [234]:
def test_cr(params, x_train, y_train, x_test, y_test):
    
    # handling data; creating torch datasets and torch dataloaders
    
    trainset = GenreDataset(x_train, y_train)
    testset = GenreDataset(x_test, y_test)
    
    trainloader = data.DataLoader(trainset, batch_size = 64, shuffle = True)
    testloader = data.DataLoader(testset, batch_size = 64, shuffle = False)
    
    # constructing net and surrounding items
    
    num_features = x_train.shape[1]
    cr_net = CNN_RNN(params)
    cr_net.float()
    cr_net.apply(clipper)
    cr_crit = nn.CrossEntropyLoss()
    cr_opt = torch.optim.Adam(cr_net.parameters(), lr=params['lr'], weight_decay = params['weight_decay'])
    num_epochs = params['epochs']
    
    # training
    
#     train(cr_net, cr_crit, cr_opt, num_epochs, trainloader)
    view_progression(cr_net, cr_crit, cr_opt, num_epochs, trainloader, testloader)
#     return test(cr_net, testloader)

In [235]:
params = {
    'cnn_in_features' : 25,
    'cnn_filters_1' : 32,
    'cnn_kernel_1' : (16, 1),
    'rnn_in_size' : 25,
    'rnn_hid_size' : 50,
    'rnn_dropout' : 0.5,
    'epochs' : 20,
    'lr' : 0.01,
    'weight_decay': 0.01
}

test_cr(params, X_train, Y_train, X_test, Y_test)

Epoch: 1
Loss: 0.02404621378580729 Accuracy: 15.6%
Test accuracy of the network: 17.6%
Epoch: 2
Loss: 0.022675397872924805 Accuracy: 21.6%
Test accuracy of the network: 22.0%
Epoch: 3
Loss: 0.02234017848968506 Accuracy: 23.0%
Test accuracy of the network: 18.8%
Epoch: 4


KeyboardInterrupt: 

In [213]:
# # tuning model
# def xval_cr(X_train, Y_train, X_test, Y_test):    
#     params = {
#         'cnn_in_features' : 25,
#         'cnn_filters_1' : 32,
#         'cnn_kernel_1' : (16, 1),
#         'rnn_in_size' : 20,
#         'rnn_hid_size' : 30,
#         'rnn_dropout' : 0.5,
#         'epochs' : 50,
#         'lr' : 0.01,
#         'weight_decay': 0.05
#     }
    
#     best_result = 0
#     best_params = {}
    
#     for num_filters in [16, 32]:
#         params['cnn_filters_1'] = num_filters
#         for kernel in [8,12]:
#             params['cnn_kernel_1'] = (kernel, 1)
#             for in_size in [25, 50]:
#                 params['rnn_in_size'] = in_size
#                 for hid_size in [25, 50]:
#                     params['rnn_hid_size'] = hid_size
#                     for dropout in [0.25, 0.5]:
#                         params['rnn_dropout'] = dropout
#                         for epochs in [20]:
#                             params['epochs'] = epochs
#                             for decay in [0.001, 0.01, 0.1]:
#                                 params['weight_decay'] = decay
                                
#                                 # finally done
#                                 result = test_cr(params, X_train, Y_train, X_test, Y_test)
#                                 if result > best_result:
#                                     best_params = params.copy()
#                                     best_result = result
    
#     f = open("xval_results.txt", "w")
#     f.write("Best dict\n")
#     f.write(json.dumps(best_params) + '\n')
#     f.write("Val acc\n")
#     f.write(str(best_result))
#     f.close()

In [214]:
# xval_cr(X_train, Y_train, X_test, Y_test)

In [165]:
# # testing stuff
# X = torch.zeros((500,1,25,1290))
# # print(X.size())
# # X is N x 1 x 25 x 1290

# cr_test_params = gen_cr_params(25)
# cr_test = CNN_RNN(cr_test_params)
# summary(cr_test, (1, 25, 1290))

# params = {
#     'cnn_in_features' : 25,
#     'cnn_filters_1' : 32,
#     'cnn_kernel_1' : (16, 1),
#     'rnn_in_size' : 20,
#     'rnn_hid_size' : 30,
#     'rnn_dropout' : 0.5,
#     'epochs' : 50,
#     'lr' : 0.01,
#     'weight_decay': 0.05
# }

In [None]:
def gen_cr_params(num_features):
    
    params = {
        'cnn_in_features' : num_features,
        'cnn_filters_1' : 32,
        'cnn_kernel_1' : (16, 1),
        'rnn_in_size' : 20,
        'rnn_hid_size' : 30,
        'rnn_dropout' : 0.5,
        'epochs' : 1,
        'lr' : 0.01,
        'weight_decay': 0.05
    }
    
    return params

In [110]:
def cr_evaluate(x_train, y_train, x_val, x_test):
    
    # handling data; creating torch datasets and torch dataloaders
    
    trainset = GenreDataset(x_train, y_train)
    valset = GenreDataset(x_val)
    testset = GenreDataset(x_test)
    
    trainloader = data.DataLoader(trainset, batch_size = 64, shuffle = True)
    valloader = data.DataLoader(valset, batch_size = 64, shuffle = False)
    testloader = data.DataLoader(testset, batch_size = 64, shuffle = False)
    
    # constructing net and surrounding items
    
    num_features = x_train.shape[1]
    params = gen_cr_params(num_features)
    
    cr_net = CNN_RNN(params)
    cr_net.float()
    cr_crit = nn.CrossEntropyLoss()
    cr_opt = torch.optim.Adam(cr_net.parameters(), lr=params['lr'])
    num_epochs = params['epochs']
    
    # training
    
    train(cr_net, cr_crit, cr_opt, num_epochs, trainloader)
    
    # generating outputs
    
    val_predictions = predictions(cr_net, valloader)
    test_predictions = predictions(cr_net, testloader)

    return val_predictions, test_predictions

In [111]:
cr_evaluate(X_train, Y_train, X_val, X_test)

Epoch: 1
Loss: 0.024110042254130045 Accuracy: 13.4%
Finished Training
(250,)
(250,)


(array([0, 0, 6, 4, 1, 4, 0, 4, 4, 6, 1, 1, 6, 0, 0, 0, 9, 6, 0, 9, 0, 6,
        0, 6, 0, 6, 6, 4, 1, 1, 9, 0, 6, 1, 1, 1, 1, 4, 6, 1, 1, 1, 4, 7,
        1, 4, 1, 1, 1, 1, 1, 0, 6, 7, 7, 4, 6, 4, 1, 6, 0, 9, 4, 7, 7, 4,
        7, 1, 0, 0, 6, 6, 6, 6, 0, 0, 6, 0, 7, 7, 4, 7, 7, 4, 7, 7, 4, 4,
        7, 1, 7, 4, 6, 0, 0, 0, 0, 9, 9, 0, 6, 0, 0, 6, 0, 6, 0, 6, 0, 7,
        7, 7, 6, 7, 7, 7, 1, 7, 7, 0, 0, 1, 1, 4, 4, 0, 6, 6, 0, 6, 0, 0,
        0, 0, 9, 1, 4, 7, 6, 4, 4, 1, 9, 7, 4, 1, 1, 1, 7, 4, 7, 1, 4, 0,
        6, 6, 6, 0, 6, 6, 6, 6, 0, 0, 6, 6, 6, 0, 0, 0, 6, 0, 0, 6, 6, 0,
        6, 6, 7, 7, 4, 1, 7, 7, 7, 4, 6, 7, 4, 7, 7, 7, 7, 7, 4, 4, 7, 4,
        7, 4, 7, 7, 7, 7, 7, 7, 7, 7, 0, 6, 6, 6, 0, 6, 6, 0, 4, 4, 6, 1,
        4, 1, 4, 7, 7, 4, 4, 7, 7, 1, 7, 4, 4, 6, 6, 6, 0, 6, 4, 0, 7, 1,
        7, 1, 7, 0, 6, 0, 0, 0]),
 array([1, 0, 1, 0, 0, 4, 1, 0, 1, 0, 0, 0, 0, 0, 0, 4, 0, 6, 0, 6, 6, 1,
        4, 7, 0, 1, 0, 4, 0, 1, 0, 4, 1, 1, 1, 6, 4, 1, 6, 4, 1, 4, 4, 1,
    

TODO:
- try different datapreprocessing: change # of channels for 1d stuff so that we get slices through all the different features of the MFCC 
- rewrite code for DCNN generation to support more versitile dimensionalities
- make RNN: LSTM + (D)CNN
- uncomment out bnorm in CNN generation code



Function:
- Input x_train, x_val, x_test, y_train
- Output model.predict(x_val), model.predict(x_test)
- save model and outputs

# OLD

## CNN

In [None]:
# class Conv_Block(nn.Module):
    
#     def __init__(self, in_planes, planes, kernel_size, stride, pool_size):
        
#         super(Conv_Block, self).__init__()
        
#         self.conv = nn.Conv1d(in_channels = in_planes, out_channels = planes, kernel_size = kernel_size, stride=stride)
#         self.dropout = nn.Dropout(p=0.5)
#         self.maxpool = nn.AvgPool1d(kernel_size = pool_size)
#         self.bnorm = nn.BatchNorm1d(num_features = planes)
    
#     def forward(self, x):
#         x = self.conv(x)
#         x = self.dropout(x)
#         x = self.maxpool(x)
#         x = self.bnorm(x)
#         x = F.relu(x)
        
#         return x

# class Conv(nn.Module):
    
#     def __init__(self, params):    
#         super(Conv, self).__init__()
        
#         filters = params['filters']
#         kernel_sizes = params['kernel_sizes']
#         strides = params['strides']
#         avg_pool_sizes = params['max_pool_sizes']
        
#         assert(len(filters) == len(kernel_sizes))
#         assert(len(filters) == len(strides))
#         assert(len(filters) == len(avg_pool_sizes))
        
#         prev_outplanes = 1
#         prev_outsize = params['input_length']
        
#         layers = []
        
#         for i in range(len(filters)):
#             inplanes = prev_outplanes
#             outplanes = inplanes * filters[i]
#             out_size = math.floor((math.floor((prev_outsize - kernel_sizes[i]) / float(strides[i]))+\
#                                 1)/float(avg_pool_sizes[i]))
            
#             prev_outsize = out_size
#             prev_outplanes = outplanes
            
#             new_block = Conv_Block(inplanes, outplanes, kernel_sizes[i], strides[i], avg_pool_sizes[i])
            
#             layers.append(new_block)
            
#         self.convs = nn.Sequential(*layers)
        
#         in_features = prev_outsize * prev_outplanes
        
#         self.fc = nn.Linear(in_features = in_features, out_features = 10)
        
#     def forward(self, x):
#         x = self.convs(x)
#         x = x.view(x.size()[0], -1)
#         x = self.fc(x)
#         return x

In [None]:
# Debugging

# big big convnet
# conv_params = {
#     'input_length' : 16770,
#     'filters' : [8, 8, 8, 8],
#     'kernel_sizes' : [64, 32, 16, 8],
#     'strides' : [8, 4, 2, 1],
#     'max_pool_sizes' : [2, 2, 2, 2]
# }

# smaller convnet
# conv_params = {
#     'input_length' : 16770,
#     'filters' : [8, 16, 64],
#     'kernel_sizes' : [64, 8, 4],
#     'strides' : [8, 4, 2],
#     'max_pool_sizes' : [16, 4, 2]
# }

# smallest convnet
conv_params = {
    'input_length' : 16770,
    'filters' : [8, 64],
    'kernel_sizes' : [64, 4],
    'strides' : [8, 2],
    'max_pool_sizes' : [32, 4]
}

# observation: this net works as long as we don't run out of elements to convolute across (no padding)

conv_test = Conv(conv_params)
# clipper = WeightClipper()
# conv_test.apply(clipper)
summary(conv_test, (1,16770))

## DCNN

In [None]:
# class DConv_Block(nn.Module):
    
#     def __init__(self, in_planes, planes, kernel_size, dialation, pool_size):
        
#         super(DConv_Block, self).__init__()
        
#         padding = int((kernel_size + (kernel_size - 1)*(dialation - 1) - 1)/2)
        
#         self.dconv = nn.Conv1d(in_channels = in_planes, out_channels = planes,
#                                kernel_size = kernel_size, stride=1, padding=padding, dilation=dialation)
#         self.dropout = nn.Dropout(p=0.5)
#         self.avgpool = nn.AvgPool1d(kernel_size = pool_size)
#         self.bnorm = nn.BatchNorm1d(num_features = planes)
    
#     def forward(self, x):
#         x = self.dconv(x)
#         x = self.dropout(x)
#         x = self.avgpool(x)
#         x = self.bnorm(x)
#         x = F.relu(x)
#         return x

# class DConv(nn.Module):
    
#     def __init__(self, params):    
#         super(DConv, self).__init__()
        
#         filters = params['filters']
#         kernel_sizes = params['kernel_sizes']
#         dialations = params['dialations']
#         avg_pool_sizes = params['avg_pool_sizes']
        
#         assert(len(filters) == len(kernel_sizes))
#         assert(len(filters) == len(dialations))
#         assert(len(filters) == len(avg_pool_sizes))
        
#         prev_outplanes = 1
#         prev_outsize = params['input_length']

#         layers = []
        
#         for i in range(len(filters)):
#             inplanes = prev_outplanes
#             outplanes = inplanes * filters[i]
            
#             out_size = math.floor(prev_outsize / float(avg_pool_sizes[i]))
            
#             prev_outsize = out_size
#             prev_outplanes = outplanes
            
#             new_block = DConv_Block(inplanes, outplanes, kernel_sizes[i], dialations[i], avg_pool_sizes[i])
            
#             layers.append(new_block)
            
#         self.dconvs = nn.Sequential(*layers)
        
#         in_features = prev_outsize * prev_outplanes
        
#         self.fc = nn.Linear(in_features = in_features, out_features = 10)
        
#     def forward(self, x):
#         x = self.dconvs(x)
#         x = x.view(x.size()[0], -1)
#         x = self.fc(x)
#         return x


In [None]:
# Debugging
dconv_params = {
    'input_length' : 16770,
    'filters' : [16, 4],
    'kernel_sizes': [64, 16],
    'dialations': [8, 2],
    'avg_pool_sizes': [32, 4]
}

# observation: this net works as long as dialation_2 is even

dconv_test = DConv(dconv_params)
summary(dconv_test, (1,16770))

# Evaluating Models

In [None]:
# from brendan
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.utils.multiclass import unique_labels
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import CCA
from sklearn.utils import shuffle
import seaborn as sns
import matplotlib.pyplot as plt

def plot_confusion_matrix(test_labels, predictions, title):
    ax= plt.subplot()
    cm = confusion_matrix(test_labels, predictions)
    sns.heatmap(cm, annot=True, ax = ax, cmap = sns.cm.rocket_r); #annot=True to annotate cells

    # labels, title and ticks
    ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels'); 
    ax.set_title(f'{title} Confusion Matrix'); 
    ax.set_ylim(top=0, bottom=10)
    ax.xaxis.set_ticklabels(all_genres); ax.yaxis.set_ticklabels(all_genres);
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
                 rotation_mode="anchor")
    plt.setp(ax.get_yticklabels(), rotation=45, ha="right",
                 rotation_mode="anchor")
    plt.show()

In [None]:
# test_labels = Y_test

def net_confusion_matrix(net, testloader, test_labels, title):
    
    separate_predictions = []
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)
    
    net.eval()
    
    with torch.no_grad():
        for data in testloader:
            
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            separate_predictions.append(predicted.numpy())
    
    predictions = np.concatenate(separate_predictions)
    plot_confusion_matrix(test_labels, predictions, title)            

In [None]:
conv_net = Conv(conv_params)

# regularizing
conv_net.apply(clipper)

conv_crit = nn.CrossEntropyLoss()
conv_opt = torch.optim.Adam(conv_net.parameters(), lr=0.001)

In [None]:
dconv_net = DConv(dconv_params)

# regularizing
dconv_net.apply(clipper)

dconv_crit = nn.CrossEntropyLoss()
dconv_opt = torch.optim.Adam(dconv_net.parameters(), lr=0.001)

In [None]:
# %%time
# # training and evaluating CNN

# train(conv_net, conv_crit, conv_opt, 30, genre_trainloader)
# test(conv_net, genre_testloader)
# net_confusion_matrix(conv_net, genre_testloader, Y_test, "Preliminary CNN")

In [None]:
# %%time
# # training and evaluating DCNN

# train(dconv_net, dconv_crit, dconv_opt, 30, genre_trainloader)
# test(dconv_net, genre_testloader)
# net_confusion_matrix(dconv_net, genre_testloader, Y_test, "Preliminary DCNN")