In [184]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

from sklearn.model_selection import train_test_split
from sklearn import preprocessing

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns
import math, random, pickle, os, operator
from tempfile import TemporaryFile

from scipy.io import wavfile
from python_speech_features import mfcc
%matplotlib inline

In [185]:
## Use GPU support
# needed to prevent error from using too much gpu memory
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

1 Physical GPUs, 1 Logical GPUs


Load Data

In [186]:
# loading data - could try converting to pandas Dataframe instead
dataset = []
train = []
test = []
with open("feat.dat", 'rb') as f:
    while True:
        try:
            dataset.append(pickle.load(f))
        except EOFError:
            f.close()
            break
    


In [187]:
# convert data to Pandas DataFrame
flattened = []
for x in dataset:   
    flattened.append(list(x[0]) + list(x[1].flatten()) + [x[2]])

cols = [['m' +str(i) for i in range(13)]+['c'+str(i) for i in range(169)]+["Genre"]]
df = pd.DataFrame(flattened, columns=cols)
    

In [188]:
df.head()


Unnamed: 0,m0,m1,m2,m3,m4,m5,m6,m7,m8,m9,...,c160,c161,c162,c163,c164,c165,c166,c167,c168,Genre
0,76.502611,-1.961417,-15.774347,3.831419,-10.473326,1.311828,-19.393732,5.28679,-16.631725,5.353444,...,3.786696,-0.021175,-14.523117,-12.995331,-3.843489,-14.831619,2.251195,29.282607,64.650762,1
1,66.93165,0.718853,-3.26783,4.1812,-8.050941,6.448259,-17.768517,14.09181,-18.332536,3.68556,...,-16.026806,-13.013754,18.476615,11.603178,-3.788941,-17.738734,-8.665845,18.410328,90.762876,1
2,78.774103,3.0397,-19.024435,-0.983591,-14.699337,7.586562,-12.823053,1.89292,-14.865326,3.595252,...,-18.08772,-6.627783,-2.016122,-12.890326,-3.702126,-3.862404,0.221521,-9.624113,83.777252,1
3,66.696107,9.211514,-4.532876,5.027148,-8.254011,8.101201,-9.67661,9.824043,-6.735213,10.332956,...,-21.272778,-13.864303,13.76483,4.09159,-5.564044,-11.718776,-10.09775,4.90207,69.758813,1
4,71.972846,0.50641,-23.834838,-2.181944,-29.875498,0.719521,-19.039014,-1.353935,-13.370416,5.069944,...,-15.531564,9.558282,14.068872,-4.271712,-29.419962,13.653176,6.385477,7.264809,102.319498,1


The model now has 183 features and 1000 training examples
- 13 - mean for each cepstral coefficient
- 169 - elements of 13x13 covariance matrix
- 1 - genre label (1-10)

**FEATURE ANALYSIS**

Ideas:
- Split data into training and test data (and randomize)
- normalise values

In [191]:
# split into train and test data with separate labels:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,:-1],df[["Genre"]], test_size=0.33, random_state=420)

In [192]:
# normalisation
# min and max processor
min_max_scaler = preprocessing.MinMaxScaler()
train_scaled = min_max_scaler.fit_transform(X_train)
test_scaled = min_max_scaler.fit_transform(X_test)
# apply normalizer
X_trainn = pd.DataFrame(train_scaled)
X_testn = pd.DataFrame(test_scaled)

X_trainn.columns = X_train.columns
X_testn.columns = X_test.columns


**Define Convolutional Neural Network model**
- Using Conv2D
    - need to have features in a 2D array
        - can append 



In [230]:
# convert features to 2d array 14x13
X_2d_train = np.zeros(shape=(len(X_trainn), 14,13,1))
for i in range(len(X_trainn)):
    X_2d_train[i] = X_trainn.iloc[i].to_numpy().reshape(14,13,1)

In [231]:
X_2d_train.shape

(670, 14, 13, 1)

In [238]:
model = Sequential()

model.add(Conv2D(64, (3,3), input_shape = X_2d_train.shape[1:]))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(64, (3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten()) # as it was 2D, dense needs 1D
model.add(Dense(64))

model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss="binary_crossentropy",
              optimizer="adam",
              metrics=['accuracy'])

model.fit(X_2d_train, y_train, batch_size=100, epochs=10, validation_split=0.1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x206edbdf1c0>

Using covariance matrix and mean matrix in CNN didn't seem to train well at all. Instead, I'll use the Mel spectogram (MFCC before carrying out DCT) as features (seen in CNN with spectrograms Notebook).
Otherwise, I could use a different model for the mel coefficients like logistic regression, multiclass SVMs and K-means clustering:

In [197]:
X_train.shape

(670, 182)

In [204]:
np.array(dataset).shape

  np.array(dataset).shape


(1000, 3)

(array([ 76.50261107,  -1.96141736, -15.7743469 ,   3.8314189 ,
        -10.47332553,   1.31182833, -19.39373183,   5.28678994,
        -16.63172467,   5.3534445 ,  -9.85657026,   6.25007663,
         -5.58749505]),
 array([[ 3.13643266e+01, -1.34683421e+00, -9.48272203e-01,
         -2.05382281e+01, -1.45847814e+01, -1.72347943e+01,
         -9.35145739e+00,  2.72034545e+00, -3.29035455e+00,
          1.04057336e+00,  2.32513168e+00,  6.09119695e+00,
          8.44732834e+00],
        [-1.34683421e+00,  3.64751586e+01, -2.19100677e+01,
         -1.52506981e+01, -7.71247217e+00, -1.42477860e+01,
         -6.48177523e+00,  3.95463925e+00,  1.14509487e+00,
         -4.30576920e+00,  9.05817993e+00,  8.98145706e+00,
          7.68495192e+00],
        [-9.48272203e-01, -2.19100677e+01,  7.63578328e+01,
          1.69425823e+01, -2.67611115e+01, -2.71149039e+00,
          1.99049253e+01,  1.64116110e+01, -1.31355709e+01,
          1.53609455e+01, -1.31293962e+01, -7.47897696e+00,
         -