In [None]:
import time
import os
import ast

import IPython.display as ipd
from tqdm import tqdm_notebook
import numpy as np
import pandas as pd

import keras
from keras.layers import Activation, Dense, Conv1D, Conv2D, MaxPooling1D, Flatten, Reshape, BatchNormalization, Dropout

from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder, StandardScaler, LabelBinarizer
from sklearn.linear_model import LogisticRegression

from sklearn import linear_model


import matplotlib.pyplot as plt
import itertools

import utils
from utils import plot_confusion_matrix

### Constants

In [None]:
AUDIO_DIR = "..\\fma_small"
META_DIR = "..\\fma_metadata"
SUBSET = 'small'

### Load data

In [None]:
# Load metadata to memory
def load_meta_data(): 
    tracks_all   = utils.load(META_DIR + '\\tracks.csv')
    features_all = utils.load(META_DIR + '\\features.csv')
    echonest_all = utils.load(META_DIR + '\\echonest.csv')

    #genres = utils.load(META_DIR + 'genres.csv')

    np.testing.assert_array_equal(features_all.index, tracks_all.index)
    assert echonest_all.index.isin(tracks_all.index).all()
    
    
    return tracks_all, features_all, echonest_all

In [None]:
# Choose Subset
def choose_small_subset(tracks_all, features_all, echonest_all):
    subset = tracks_all.index[tracks_all['set', 'subset'] <= 'small']

    assert subset.isin(tracks_all.index).all()
    assert subset.isin(features_all.index).all()
    
    tracks = tracks_all.loc[subset]
    features = features_all.loc[subset]

    return tracks, features
    

In [None]:
tracks_all, features_all, echonest_all = load_meta_data()
tracks, features =  choose_small_subset(tracks_all, features_all, echonest_all)

In [None]:
tracks.shape, features.shape

### Split Train Val Test

In [None]:
# Splitting into Train, Validation, Test
train_index = tracks.index[tracks['set', 'split'] == 'training']
val_index   = tracks.index[tracks['set', 'split'] == 'validation']
test_index  = tracks.index[tracks['set', 'split'] == 'test']


print('{} training examples'.format(len(train_index)))
print('{} validation examples'.format(len(val_index)))
print('{} testing examples'.format(len(test_index)))

In [None]:
X = features.values
Y = tracks['track']['genre_top']

In [None]:
Xtrain = features.loc[train_index].values
Xval  = features.loc[val_index].values
Xtest  = features.loc[test_index].values

Ytrain = tracks.loc[train_index]['track']['genre_top'].values
Yval = tracks.loc[val_index]['track']['genre_top'].values
Ytest = tracks.loc[test_index]['track']['genre_top'].values

In [None]:
classes = list(set(Y))

### Deep Learning Model Using Audio

In [None]:
trainIDs = tracks.loc[train_index]["track"].index.values
valIDs  = tracks.loc[val_index]["track"].index.values
testIDs  = tracks.loc[test_index]["track"].index.values

In [None]:
labels_onehot = MultiLabelBinarizer().fit_transform(tracks['track', 'genre_top'])
labels_onehot = pd.DataFrame(labels_onehot, index=tracks.index)

### CNN using the 128x128 patches


In [None]:
SPEC_DIR = "..\\spectrogram\\"
batch_size = 32

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, GRU
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.layers.normalization import BatchNormalization

def cnn_model(input_shape=(105,105,3), output = 8):
    
    model = Sequential()
    model.add(BatchNormalization(axis=2, input_shape=input_shape))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('elu'))    
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.1))
    
    model.add(Conv2D(32, (3, 3)))
    model.add(BatchNormalization(axis=3))
    model.add(Activation('elu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.1))

    model.add(Conv2D(64, (3, 3)))
    model.add(BatchNormalization(axis=3))
    model.add(Activation('elu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.1))
#     model.summary()
#     model.add(Flatten())
    model.add(Reshape((11*11, 64)))
    model.add(GRU(32, return_sequences=True, name='gru1'))
    model.add(GRU(32, return_sequences=False, name='gru2'))
    model.add(Dropout(0.25))
    
    model.add(Dense(64))
    model.add(Activation('elu'))
    model.add(Dropout(0.25))
    model.add(Dense(output))
    model.add(Activation('sigmoid'))
    
    model.compile(loss='categorical_crossentropy',  
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    
    return model
    
# cnn_model()

In [None]:

from keras.preprocessing.image import ImageDataGenerator

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_directory(
        SPEC_DIR + 'train',  
        
        target_size=(105, 105),
        batch_size=batch_size)

val_generator = test_datagen.flow_from_directory(
        SPEC_DIR + 'val',
        target_size=(105, 105),
        batch_size=batch_size)


In [None]:
model = cnn_model()
model.summary()

In [None]:
TFBoard = init_env_and_tfboard("cnn")
model = cnn_model()

In [None]:
model.load_weights('crnn_try_1.h5')

In [None]:
params = {
#         "callbacks": [TFBoard],
        "verbose": 2
}

model.fit_generator(train_generator, 
                    steps_per_epoch = 10000 / batch_size, 
                    epochs = 30,
                    validation_data=val_generator,
                    validation_steps=800 // batch_size,
                    **params)
             

In [None]:
model.save_weights('crnn_try_1.h5')