## Necessary Imports

In [None]:
%load_ext autoreload
%autoreload 2

import math
import os
import json

import tensorflow as tf
import tensorflow.keras as keras

import librosa

import numpy as np
import seaborn as sns
import pandas as pd

from sklearnex import patch_sklearn
patch_sklearn()
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.manifold import TSNE

import matplotlib.pyplot as plt
%matplotlib inline

# custom helper functions
from utils import *

from xgboost import XGBClassifier

RANDOM_SEED = 0

## Load Data & Develop Test/Train Splits

In [None]:
FMA_JSON_PATH = "/storage/graham/music/fma_small_data.json"
GTZAN_JSON_PATH = "/storage/graham/music/gtzan_data.json"
MODEL_PATH = "/storage/graham/music/models/"

GTZAN_SAMPLE_RATE = 22050  # gtzan sample rate
FMA_SAMPLE_RATE = GTZAN_SAMPLE_RATE * 2  # fma sample rate
DURATION = 30  # measure in seconds

In [None]:
%%time
genres, X, y = load_mfcc_data(FMA_JSON_PATH)

In [None]:
SPLIT = 0.3

# create train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=SPLIT)

# reshape MFCC to be a vector
X_train_vec = X_train.reshape(X_train.shape[0],X_train.shape[1]*X_train.shape[2])
X_test_vec = X_test.reshape(X_test.shape[0],X_test.shape[1]*X_test.shape[2])

X_vec = X.reshape(X.shape[0], X.shape[1]*X.shape[2])

## Transfer Learning to GTZAN

### Load in GTZAN Dataset

In [None]:
%%time
genres_gtzan, X_gtzan, y_gtzan = load_mfcc_data(GTZAN_JSON_PATH)

In [None]:
# create train/test split
X_gtzan_train, X_gtzan_val, X_gtzan_test, y_gtzan_train, y_gtzan_val, y_gtzan_test = test_train_val_split(X_gtzan, y_gtzan, test_size=0.3, val_size=0.3, rseed=RANDOM_SEED)

X_gtzan_vec = X_gtzan.reshape(X_gtzan.shape[0], X_gtzan.shape[1]*X_gtzan.shape[2])

### Load Previous CNN Model

In [None]:
base_model = keras.models.load_model(MODEL_PATH + "/fma_cnn/"")
for layer in base_model.layers:
    layer.trainable = False
base_model.summary()

### Replace Last Layer to Match GTZAN Dimensionality

In [None]:
gtzan_predict = keras.layers.Dense(len(genres_gtzan), activation='softmax', name='gtzan')
gtzan_transfer = replace_intermediate_layer_in_keras(base_model, 12, gtzan_predict)
gtzan_transfer._name = "FMA_Transfer_to_GTZAN"
gtzan_transfer.summary()

### Fine Tune on GTZAN Data

In [None]:
%%time
# compile model
optimiser = keras.optimizers.Adam(learning_rate=0.001)
gtzan_transfer.compile(optimizer=optimiser,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# prep data for CNN
X_gtzan_train_cnn = X_gtzan_train[..., np.newaxis]
X_gtzan_val_cnn = X_gtzan_val[..., np.newaxis]
X_gtzan_test_cnn = X_gtzan_test[..., np.newaxis]

# train model
history = gtzan_transfer.fit(X_gtzan_train_cnn, 
                             y_gtzan_train, 
                             validation_data=(X_gtzan_val_cnn, y_gtzan_val), 
                             batch_size=32, 
                             epochs=30,
                             callbacks=[saveBest, es, lr])

# plot accuracy/error for training and validation
plot_history(history)

# evaluate model on test set
test_loss, test_acc = gtzan_transfer.evaluate(X_gtzan_test_cnn, y_gtzan_test, verbose=2)
print('\nTest accuracy:', test_acc)