In [51]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from pathlib import Path
import pydotplus
import seaborn as sns
import sklearn
import sys

from graphviz import Source
from pandarallel import pandarallel
from sklearn.model_selection import train_test_split, GridSearchCV, learning_curve
from sklearn.metrics import classification_report, plot_confusion_matrix, mean_squared_error
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.neural_network import MLPClassifier

from os import listdir, path
from os.path import isfile, join

# Python ≥3.5 is required
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
assert sklearn.__version__ >= "0.20"

# To plot pretty figures
%matplotlib inline
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
plt.style.use('fivethirtyeight')

# to make this notebook's output stable across runs
np.random.seed(42)

RESULTS_PATH = os.path.join('./results')

os.environ['LIBROSA_CACHE_DIR'] = '/tmp/librosa_cache'
import librosa

%matplotlib inline
pandarallel.initialize(progress_bar=True)

INFO: Pandarallel will run on 6 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [47]:
genre_folder = '../MLP2/rawdata/genres'
genre_names = [f for f in listdir(
    path=genre_folder) if not isfile(join(genre_folder, f))]

SAMPLE_RATE = 22050

time_series_data, labels = [], []
i = 0

song_paths = []
for g in genre_names:
    p = genre_folder + '/' + g  # path for a genre folder
    for song in sorted(listdir(path=p)):  # song at index 0 for each folder
        song_paths.append(p + '/' + song)
        # time_series, sample_rate = librosa.load(Path(song_path))
        # t = np.array(time_series)
        # time_series_data.append(t)
        labels.append(g)
        i += 0

In [48]:
song_df = pd.DataFrame({'path': song_paths, 'genre': labels})
song_df.head()

Unnamed: 0,path,genre
0,../MLP2/rawdata/genres/pop/pop.00000.wav,pop
1,../MLP2/rawdata/genres/pop/pop.00001.wav,pop
2,../MLP2/rawdata/genres/pop/pop.00002.wav,pop
3,../MLP2/rawdata/genres/pop/pop.00003.wav,pop
4,../MLP2/rawdata/genres/pop/pop.00004.wav,pop


In [73]:
song_df['time_series'] = song_df.path.parallel_apply(
    lambda x: librosa.load(x)[0])


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=167), Label(value='0 / 167'))), HB…

In [85]:
song_df['fourier_tempogram'] = song_df.time_series.parallel_apply(
    lambda x: librosa.feature.fourier_tempogram(y=x)[0])
song_df['mfcc'] = song_df.time_series.parallel_apply(
    lambda x: librosa.feature.mfcc(y=x, sr=SAMPLE_RATE)[0])
song_df['zero_crossing_rate'] = song_df.time_series.parallel_apply(
    lambda x: librosa.feature.zero_crossing_rate(y=x)[0])
song_df['spectral_centroid'] = song_df.time_series.parallel_apply(
    lambda x: librosa.feature.spectral_centroid(y=x)[0])
song_df['spectral_rolloff'] = song_df.time_series.parallel_apply(
    lambda x: librosa.feature.spectral_rolloff(y=x)[0])
song_df['chroma_frequencies'] = song_df.time_series.parallel_apply(
    lambda x: librosa.feature.chroma_stft(y=x)[0])

song_df

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=167), Label(value='0 / 167'))), HB…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=167), Label(value='0 / 167'))), HB…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=167), Label(value='0 / 167'))), HB…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=167), Label(value='0 / 167'))), HB…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=167), Label(value='0 / 167'))), HB…

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=167), Label(value='0 / 167'))), HB…

Unnamed: 0,path,genre,time_series,zero_crossing_rate,spectral_centroid,spectral_rolloff,mfcc,chroma_frequencies,fourier_tempogram
0,../MLP2/rawdata/genres/pop/pop.00000.wav,pop,"[-0.0887146, -0.09524536, -0.102752686, -0.113...","[0.00927734375, 0.0234375, 0.04541015625, 0.05...","[1812.4619528427452, 2913.4550162394453, 2870....","[5415.6005859375, 8247.216796875, 8118.0175781...","[-195.25616, -150.96812, -132.99374, -149.2361...","[1.0, 1.0, 0.5475674, 0.3053872, 0.4002126, 0....","[(147.03117+0j), (148.41435+0j), (149.79546+0j..."
1,../MLP2/rawdata/genres/pop/pop.00001.wav,pop,"[0.7692261, 0.2246399, 0.69036865, 0.7468872, ...","[0.02587890625, 0.03173828125, 0.03662109375, ...","[3696.826349368777, 3781.2397094717344, 3663.4...","[8268.75, 8570.21484375, 8828.61328125, 8021.1...","[8.333065, -5.50078, -77.25962, -142.30276, -2...","[0.79645014, 0.55124533, 0.47261623, 0.3771407...","[(220.49376+0j), (222.84566+0j), (225.19794+0j..."
2,../MLP2/rawdata/genres/pop/pop.00002.wav,pop,"[-0.09274292, -0.11630249, -0.11886597, -0.107...","[0.03076171875, 0.05419921875, 0.0869140625, 0...","[2951.7610867714993, 2998.306335806811, 3043.4...","[8764.013671875, 8925.5126953125, 7913.4521484...","[-171.28282, -156.23244, -45.063564, 26.249937...","[0.29866484, 0.137576, 0.26829377, 0.56323946,...","[(167.81305+0j), (169.5363+0j), (171.25871+0j)..."
3,../MLP2/rawdata/genres/pop/pop.00003.wav,pop,"[-0.024993896, -0.018157959, -0.020233154, 0.0...","[0.06689453125, 0.08251953125, 0.0908203125, 0...","[3016.556497593843, 2733.093472180655, 2394.70...","[8914.74609375, 7934.9853515625, 7418.18847656...","[-151.17372, -115.99337, -137.84445, -183.4657...","[1.0, 1.0, 0.35716882, 0.13711458, 0.09756262,...","[(153.44576+0j), (154.8538+0j), (156.2569+0j),..."
4,../MLP2/rawdata/genres/pop/pop.00004.wav,pop,"[-0.12844849, -0.10952759, -0.06726074, -0.028...","[0.0419921875, 0.0634765625, 0.08447265625, 0....","[1861.496524385576, 1894.3471992720065, 1987.4...","[3348.4130859375, 3348.4130859375, 3359.179687...","[-87.60696, -63.465073, -62.712914, -56.52946,...","[0.31229314, 0.39315093, 0.3674996, 0.27865446...","[(129.28964+0j), (130.727+0j), (132.16612+0j),..."
...,...,...,...,...,...,...,...,...,...
995,../MLP2/rawdata/genres/jazz/jazz.00095.wav,jazz,"[-0.062194824, -0.09188843, -0.072052, -0.0799...","[0.02587890625, 0.03759765625, 0.0556640625, 0...","[1814.6794306123995, 2044.7159649785262, 2234....","[3886.7431640625, 4909.5703125, 5760.131835937...","[-211.30547, -206.57405, -231.43155, -213.8381...","[0.3083562, 0.11353422, 0.09033802, 0.1906156,...","[(103.104195+0j), (104.22215+0j), (105.34045+0..."
996,../MLP2/rawdata/genres/jazz/jazz.00096.wav,jazz,"[0.045013428, 0.07720947, 0.058258057, 0.06036...","[0.044921875, 0.0673828125, 0.0859375, 0.09570...","[1955.5418442933342, 2164.7091771559126, 2369....","[4532.7392578125, 5200.2685546875, 5523.266601...","[-181.1916, -177.69016, -204.2336, -220.05225,...","[0.08535859, 0.028048158, 0.010128682, 0.00710...","[(105.53118+0j), (106.63355+0j), (107.73575+0j..."
997,../MLP2/rawdata/genres/jazz/jazz.00097.wav,jazz,"[0.02279663, 0.03564453, 0.029205322, 0.029632...","[0.017578125, 0.0263671875, 0.03564453125, 0.0...","[1187.8249264990166, 952.6527363855892, 817.10...","[2217.919921875, 1442.724609375, 1184.32617187...","[-354.09113, -373.8292, -403.8538, -416.41177,...","[0.046386052, 0.038805358, 0.03744893, 0.03175...","[(90.99232+0j), (92.01011+0j), (93.02835+0j), ..."
998,../MLP2/rawdata/genres/jazz/jazz.00098.wav,jazz,"[-0.031066895, -0.05078125, -0.04537964, -0.04...","[0.0068359375, 0.01611328125, 0.0205078125, 0....","[1456.6106500541023, 1413.8374732896405, 1448....","[3563.7451171875, 3499.1455078125, 3574.511718...","[-308.07678, -300.8568, -313.62973, -321.6552,...","[0.9682443, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9974777...","[(130.84767+0j), (132.1267+0j), (133.40443+0j)..."


In [83]:
[[len(song_df.iloc[i][c]) for c in song_df.columns] for i in range (10)]

[[40, 3, 661504, 1293, 1293, 1293, 1293, 1293, 1294],
 [40, 3, 661504, 1293, 1293, 1293, 1293, 1293, 1294],
 [40, 3, 661504, 1293, 1293, 1293, 1293, 1293, 1294],
 [40, 3, 661504, 1293, 1293, 1293, 1293, 1293, 1294],
 [40, 3, 661504, 1293, 1293, 1293, 1293, 1293, 1294],
 [40, 3, 661504, 1293, 1293, 1293, 1293, 1293, 1294],
 [40, 3, 661504, 1293, 1293, 1293, 1293, 1293, 1294],
 [40, 3, 661504, 1293, 1293, 1293, 1293, 1293, 1294],
 [40, 3, 661504, 1293, 1293, 1293, 1293, 1293, 1294],
 [40, 3, 661504, 1293, 1293, 1293, 1293, 1293, 1294]]

In [97]:
X = song_df[['zero_crossing_rate', 'spectral_centroid', 'spectral_rolloff', 'mfcc', 'chroma_frequencies', 'fourier_tempogram']]
y = song_df['genre']
X

Unnamed: 0,zero_crossing_rate,spectral_centroid,spectral_rolloff,mfcc,chroma_frequencies,fourier_tempogram
0,"[0.00927734375, 0.0234375, 0.04541015625, 0.05...","[1812.4619528427452, 2913.4550162394453, 2870....","[5415.6005859375, 8247.216796875, 8118.0175781...","[-195.25616, -150.96812, -132.99374, -149.2361...","[1.0, 1.0, 0.5475674, 0.3053872, 0.4002126, 0....","[(147.03117+0j), (148.41435+0j), (149.79546+0j..."
1,"[0.02587890625, 0.03173828125, 0.03662109375, ...","[3696.826349368777, 3781.2397094717344, 3663.4...","[8268.75, 8570.21484375, 8828.61328125, 8021.1...","[8.333065, -5.50078, -77.25962, -142.30276, -2...","[0.79645014, 0.55124533, 0.47261623, 0.3771407...","[(220.49376+0j), (222.84566+0j), (225.19794+0j..."
2,"[0.03076171875, 0.05419921875, 0.0869140625, 0...","[2951.7610867714993, 2998.306335806811, 3043.4...","[8764.013671875, 8925.5126953125, 7913.4521484...","[-171.28282, -156.23244, -45.063564, 26.249937...","[0.29866484, 0.137576, 0.26829377, 0.56323946,...","[(167.81305+0j), (169.5363+0j), (171.25871+0j)..."
3,"[0.06689453125, 0.08251953125, 0.0908203125, 0...","[3016.556497593843, 2733.093472180655, 2394.70...","[8914.74609375, 7934.9853515625, 7418.18847656...","[-151.17372, -115.99337, -137.84445, -183.4657...","[1.0, 1.0, 0.35716882, 0.13711458, 0.09756262,...","[(153.44576+0j), (154.8538+0j), (156.2569+0j),..."
4,"[0.0419921875, 0.0634765625, 0.08447265625, 0....","[1861.496524385576, 1894.3471992720065, 1987.4...","[3348.4130859375, 3348.4130859375, 3359.179687...","[-87.60696, -63.465073, -62.712914, -56.52946,...","[0.31229314, 0.39315093, 0.3674996, 0.27865446...","[(129.28964+0j), (130.727+0j), (132.16612+0j),..."
...,...,...,...,...,...,...
995,"[0.02587890625, 0.03759765625, 0.0556640625, 0...","[1814.6794306123995, 2044.7159649785262, 2234....","[3886.7431640625, 4909.5703125, 5760.131835937...","[-211.30547, -206.57405, -231.43155, -213.8381...","[0.3083562, 0.11353422, 0.09033802, 0.1906156,...","[(103.104195+0j), (104.22215+0j), (105.34045+0..."
996,"[0.044921875, 0.0673828125, 0.0859375, 0.09570...","[1955.5418442933342, 2164.7091771559126, 2369....","[4532.7392578125, 5200.2685546875, 5523.266601...","[-181.1916, -177.69016, -204.2336, -220.05225,...","[0.08535859, 0.028048158, 0.010128682, 0.00710...","[(105.53118+0j), (106.63355+0j), (107.73575+0j..."
997,"[0.017578125, 0.0263671875, 0.03564453125, 0.0...","[1187.8249264990166, 952.6527363855892, 817.10...","[2217.919921875, 1442.724609375, 1184.32617187...","[-354.09113, -373.8292, -403.8538, -416.41177,...","[0.046386052, 0.038805358, 0.03744893, 0.03175...","[(90.99232+0j), (92.01011+0j), (93.02835+0j), ..."
998,"[0.0068359375, 0.01611328125, 0.0205078125, 0....","[1456.6106500541023, 1413.8374732896405, 1448....","[3563.7451171875, 3499.1455078125, 3574.511718...","[-308.07678, -300.8568, -313.62973, -321.6552,...","[0.9682443, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9974777...","[(130.84767+0j), (132.1267+0j), (133.40443+0j)..."


In [94]:
input_shape = (128, 1000, 3)
CNNmodel = models.Sequential()
CNNmodel.add(layers.Conv2D(
    32, (3, 3), activation='relu', input_shape=input_shape))
CNNmodel.add(layers.MaxPooling2D((2, 2)))
CNNmodel.add(layers.Dropout(0.2))
CNNmodel.add(layers.Conv2D(64, (3, 3), activation='relu'))
CNNmodel.add(layers.MaxPooling2D((2, 2)))
CNNmodel.add(layers.Dropout(0.2))
CNNmodel.add(layers.Conv2D(64, (3, 3), activation='relu'))
CNNmodel.add(layers.Flatten())
CNNmodel.add(layers.Dense(64, activation='relu'))
CNNmodel.add(layers.Dropout(0.2))
CNNmodel.add(layers.Dense(32, activation='relu'))
CNNmodel.add(layers.Dense(24, activation='softmax'))

In [96]:
history = CNNmodel.fit(X_train, y_train, epochs=20,
                       validation_data=(X_test, y_test))


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

In [95]:
CNNmodel.compile(optimizer='adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),metrics=['accuracy'])

Creating splits

In [89]:
from sklearn.model_selection import train_test_split, GridSearchCV

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)


Normalizing the data

In [91]:
# X_train = MinMaxScaler().fit_transform(X_train)
# X_test = MinMaxScaler().fit_transform(X_test)

In [93]:
import tensorflow as tf
from tensorflow import keras as k
from keras import models, layers
from keras.models import Sequential
from keras.layers import  Conv1D

In [14]:
X_train.shape

(800, 675808)

In [16]:
def trainModel(model, epochs, optimizer, X_train, y_train, X_test, y_test):
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics='accuracy')
    return model.fit(X_train, y_train, validation_data=(X_test, y_test),
                     epochs=epochs)


In [17]:
def plotValidate(history):
    print("Validation Accuracy: " + max(history.history["val_accuracy"]))
    pd.DataFrame(history.history).plot(figsize=(12, 6))
    plt.show


In [28]:
model = Sequential()
model.add(Conv1D(filters = 512, kernel_size = 3, activation='relu', input_shape=X.shape))


In [29]:
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(k.layers.Dropout(0.5))
model.add(k.layers.MaxPooling1D(pool_size=2))
model.add(k.layers.Flatten())
model.add(k.layers.Dense(100, activation='relu'))
model.add(k.layers.Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])


In [37]:

verbose, epochs, batch_size = 0, 10, 32
n_timesteps, n_features, n_outputs = X_train.shape[1], 1, y_train.shape[1]
# fit network
model.fit(X_train, y_train, epochs=epochs,
            batch_size=batch_size, verbose=verbose)
# evaluate model
_, accuracy = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)
print(accuracy)

ValueError: in user code:

    File "/Users/miarodgers/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "/Users/miarodgers/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1040, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/miarodgers/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1030, in run_step  **
        outputs = model.train_step(data)
    File "/Users/miarodgers/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 889, in train_step
        y_pred = self(x, training=True)
    File "/Users/miarodgers/opt/anaconda3/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/miarodgers/opt/anaconda3/lib/python3.8/site-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential_4" is incompatible with the layer: expected shape=(None, 1000, 675808), found shape=(32, 675808)
