In [1]:
# Imports
# from ape_paths import wav_path
import os
import librosa
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.python.keras import utils
import tensorflow as tf
from keras.utils import to_categorical
from notebook_functions import *

z_path = "C:\\Users\\Zack\\Desktop\\work\\OSU\\467_capstone\\data\\audio_samples"

In [2]:
def extract_from_genres(root_dir, genre):
    mel_specs = []
    full_labels = []
    for file in os.scandir(root_dir):
        if file.is_dir() and file.name == genre:
            print(file.name)
            print(type(file.name))
            spects, labels = extract_mel_spectrogram(file, file.name)
            # Adding the mel spectrogram to the list
            mel_specs += spects
            # Extracting the label and adding it to the list
            # label = str(file).split('.')[0][11:]
            full_labels += labels
    return mel_specs, full_labels
    # # Converting the list or arrays to an array
    # X = np.array(mel_specs)
    
    

In [3]:
def extract_mel_spectrogram(music_dir):
    '''
    Creates DataFrame with mel spectrogram and genres for all audio 
    sample files within given directory.
    dir     : string of directory location with audio samples
    returns : panda's DataFrame
    '''
    # helper function for getting lists of sample paths
    song_paths, _ = make_file_list(music_dir)
    
    mel_specs = []
    genre_labels = []
    
    for song in song_paths:
        
        # load sample data
        shape, sample_rate = librosa.core.load(song)
        
        # add genre to list for sample
        host = os.path.dirname(song)
        genre = os.path.basename(host)
        genre_labels.append(genre)
        
        # compute mel spectrograms
        spect = librosa.feature.melspectrogram(shape, sr=sample_rate, n_fft=2048, hop_length=1024)
        spect = librosa.power_to_db(spect, ref=np.max)
        
        # adjust spectrograms to all be 128 x 660
        if spect.shape[1] != 660:
            spect.resize(128,660, refcheck=False)
        
        # flatten to fit dataframe and add to list
        spect = spect.flatten()
        mel_specs.append(spect)
    return mel_specs, genre_labels

In [None]:
# def extract_mel_spectrogram(genre_dir, label):
#     '''
#     This function takes in a directory of audio files in .wav format, computes the
#     mel spectrogram for each audio file, reshapes them so that they are all the 
#     same size, and stores them in a numpy array. 
    
#     It also creates a list of genre labels and maps them to numeric values.
    
#     Parameters:
#     directory (int): a directory of audio files in .wav format
    
#     Returns:
#     X (array): array of mel spectrogram data from all audio files in the given
#     directory
#     y (array): array of the corresponding genre labels in numeric form
#     '''
    
#     # Creating empty lists for mel spectrograms and labels
#     labels = []
#     mel_specs = []
    
    
#     # Looping through each file in the directory
#     for file in os.scandir(genre_dir):
#         # Don't process if not .mp3 file
#         # if file.name.endswith('.wav'):  
#         # Loading in the audio file
#         print(str(file))
#         y, sr = librosa.core.load(file)
        
#         # Extracting the label and adding it to the list
#         # label = str(file).split('.')[0][11:]
#         labels.append(label)

#         # Computing the mel spectrograms
#         spect = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=1024)
#         spect = librosa.power_to_db(spect, ref=np.max)
        
#         # Adjusting the size to be 128 x 660
#         if spect.shape[1] != 660:
#             spect.resize(128,660, refcheck=False)
            
#         # Adding the mel spectrogram to the list
#         mel_specs.append(spect)
#     return mel_specs, labels

In [4]:
X, y = extract_mel_spectrogram(z_path)



In [5]:
mel_spec_list = X[:]
labels_list = y[:]

In [None]:
X1, y1 = extract_from_genres(z_path, 'Chiptune')

In [None]:
X2, y2 = extract_from_genres(z_path, 'Classical')

In [None]:
X3, y3 = extract_from_genres(z_path, 'Country')

In [None]:
X4, y4 = extract_from_genres(z_path, 'Electronic')

In [None]:
X5, y5 = extract_from_genres(z_path, 'Folk')

In [None]:
X6, y6 = extract_from_genres(z_path, 'Hip-Hop')

In [None]:
X7, y7 = extract_from_genres(z_path, 'Indie-Rock')

In [None]:
X8, y8 = extract_from_genres(z_path, 'Jazz')

In [None]:
X9, y9 = extract_from_genres(z_path, 'Metal')

In [None]:
X10, y10 = extract_from_genres(z_path, 'Pop')

In [None]:
X11, y11 = extract_from_genres(z_path, 'Post-Rock')

In [None]:
X12, y12 = extract_from_genres(z_path, 'Psych-Rock')

In [None]:
X13, y13 = extract_from_genres(z_path, 'Punk')

In [None]:
X14, y14 = extract_from_genres(z_path, 'Reggae')

In [None]:
X15, y15 = extract_from_genres(z_path, 'Rock')

In [None]:
X16, y16 = extract_from_genres(z_path, 'Techno')

In [None]:
X17, y17 = extract_from_genres(z_path, 'Trip-Hop')

In [None]:
X = X0 + X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 + X12 + X13 + X14 + X15 + X16 + X17

In [None]:
y = y0 + y1 + y2 + y3 + y4 + y5 + y6 + y7 + y8 + y9 + y10 + y11 + y12 + y13 + y14 + y15 + y16 + y17

In [22]:
X = np.array(X)

print(len(X))

8346


In [9]:
print(len(y))

8346


In [24]:
# Converting labels to numeric values
full_labels = pd.Series(y)
# print("Full labels: ", full_labels, type(full_labels))
label_dict = {
    'Ambient Electronic': 0,
    'Chiptune': 1,
    'Classical': 2,
    'Country': 3,
    'Electronic': 4,
    'Folk': 5,
    'Hip-Hop': 6,
    'Indie-Rock': 7,
    'Jazz': 8,
    'Metal': 9,
    'Pop': 10,
    'Post-Rock': 11,
    'Psych-Rock': 12,
    'Punk': 13,
    'Reggae': 14,
    'Rock': 15,
    'Techno': 16,
    'Trip-Hop': 17
}
y = full_labels.map(label_dict).values

Full labels:  0       Ambient Electronic
1       Ambient Electronic
2       Ambient Electronic
3       Ambient Electronic
4       Ambient Electronic
               ...        
8341              Trip-Hop
8342              Trip-Hop
8343              Trip-Hop
8344              Trip-Hop
8345              Trip-Hop
Length: 8346, dtype: object <class 'pandas.core.series.Series'>


In [25]:
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y, test_size=.2)

In [26]:
# Checking the minimum value (the scale ranges from zero to some negative value) to see how we should scale the data
min_val = X_train.min()
print(min_val)

-80.0


In [27]:
# Scaling our data to be between 0 and 1 using the minimum value from above
X_train /= min_val
X_test /= min_val

In [28]:
# Reshaping images to be 128 x 660 x 1, where the 1 represents the single color channel
X_train = X_train.reshape(X_train.shape[0], 128, 660, 1)
X_test = X_test.reshape(X_test.shape[0], 128, 660, 1)

In [29]:
print(y_train)
print(y_test)

[ 6 13 11 ...  5  9  2]
[ 6 11  6 ...  6 10  1]


In [30]:
# One hot encoding our labels
y_train = to_categorical(y_train, num_classes=18)
y_test = to_categorical(y_test, num_classes=18)

In [31]:
print(y_train)
print(y_test)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]


In [33]:
#CNN
# Initializing a random seed for replication purposes
start = time.time()
np.random.seed(12345)
tf.random.set_seed(123)

# Initiating an empty neural network
cnn_model = Sequential(name='cnn_1')

# Adding convolutional layer
cnn_model.add(Conv2D(filters=32,
                     kernel_size=(5,5),
                     activation='relu',
                     input_shape=(128,660,1)))

# Adding max pooling layer
cnn_model.add(MaxPooling2D(pool_size=(4,4)))

# Adding convolutional layer
cnn_model.add(Conv2D(filters=16,
                     kernel_size=(5,5),
                     activation='relu'))

# Adding max pooling layer
cnn_model.add(MaxPooling2D(pool_size=(4,4)))

# Adding a flattened layer to input our image data
cnn_model.add(Flatten())

# Adding a dense layer with 64 neurons
cnn_model.add(Dense(64, activation='relu'))

# Adding a dropout layer for regularization
cnn_model.add(Dropout(0.25))

# Adding an output layer
cnn_model.add(Dense(18, activation='softmax'))

# Compiling our neural network
cnn_model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

# Fitting our neural network
history = cnn_model.fit(X_train,
                        y_train, 
                        batch_size=16,
                        validation_data=(X_test, y_test),
                        epochs=15)

end = time.time()
print(end - start)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
2131.805577278137


In [35]:
start = time.time()
np.random.seed(12346)
tf.random.set_seed(1234)

# Initiating an empty neural network
cnn_model2 = Sequential(name='cnn_2')

# Adding convolutional layer
cnn_model2.add(Conv2D(filters=16,
                     kernel_size=(3,3),
                     activation='relu',
                     input_shape=(128,660,1)))

# Adding max pooling layer
cnn_model2.add(MaxPooling2D(pool_size=(2,4)))

# Adding convolutional layer
cnn_model2.add(Conv2D(filters=32,
                     kernel_size=(3,3),
                     activation='relu'))

# Adding max pooling layer
cnn_model2.add(MaxPooling2D(pool_size=(2,4)))

# Adding a flattened layer to input our image data
cnn_model2.add(Flatten())

# Adding a dense layer with 64 neurons
cnn_model2.add(Dense(64, activation='relu'))

# Adding a dropout layer for regularization
cnn_model2.add(Dropout(0.25))

# Adding an output layer
cnn_model2.add(Dense(18, activation='softmax'))

# Compiling our neural network
cnn_model2.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

# Fitting our neural network
history2 = cnn_model2.fit(X_train,
                        y_train, 
                        batch_size=1,
                        validation_data=(X_test, y_test),
                        epochs=1)

end = time.time()
print(end - start)

188.083163022995


In [36]:


start = time.time()
np.random.seed(12444)
tf.random.set_seed(1244)

# Initiating an empty neural network
cnn_model3 = Sequential(name='cnn_3')

# Adding convolutional layer
cnn_model3.add(Conv2D(filters=16,
                     kernel_size=(3,3),
                     activation='relu',
                     input_shape=(128,660,1)))

# Adding max pooling layer
cnn_model3.add(MaxPooling2D(pool_size=(2,4)))

# Adding convolutional layer
cnn_model3.add(Conv2D(filters=32,
                     kernel_size=(3,3),
                     activation='relu'))

# Adding max pooling layer
cnn_model3.add(MaxPooling2D(pool_size=(2,4)))

# Adding a flattened layer to input our image data
cnn_model3.add(Flatten())

# Adding a dense layer with 64 neurons
cnn_model3.add(Dense(64, activation='relu'))

# Adding a dropout layer for regularization
cnn_model3.add(Dropout(0.25))

# Adding an output layer
cnn_model3.add(Dense(18, activation='softmax'))

# Compiling our neural network
cnn_model3.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

# Fitting our neural network
history3 = cnn_model3.fit(X_train,
                        y_train, 
                        batch_size=1,
                        validation_data=(X_test, y_test),
                        epochs=50)

end = time.time()
print(end - start)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
9178.422546386719


In [46]:
import random
import copy

start = time.time()

models = {}
histories = {}
data_splits = {}

x_z = X[:]
y_z = y[:]
min_val = x_z.min()
x_z /= min_val
x_z = x_z.reshape(x_z.shape[0], 128, 660, 1)
y_z = to_categorical(y_z, num_classes=18)

top_acc = 0.48

for i in range(5,10):
    model_name = "model_" + str(i)
    print("\n", model_name, ": \n")
    random.seed(i)
    r1 = random.randint(12000, 12500)
    print("seed 1: ", r1)
    r2 = random.randint(10000, 11999)
    print("seed 2: ", r2)
    np.random.seed(r1)
    tf.random.set_seed(r2)

    X_train, X_test, y_train, y_test = train_test_split(x_z, y_z, random_state=50+i, stratify=y_z, test_size=.2)
    data_splits[model_name] = copy.deepcopy([X_train, X_test, y_train, y_test])

    # Initiating an empty neural network
    temp_model = Sequential(name=model_name)

    # Adding convolutional layer
    filters = random.randint(16, 32)
    print("filters 1: ", filters)
    temp_model.add(Conv2D(filters=filters,
                        kernel_size=(3,3),
                        activation='relu',
                        input_shape=(128,660,1)))

    # Adding max pooling layer
    pools = random.randint(2, 5)
    print("pooling 1: ", pools)
    temp_model.add(MaxPooling2D(pool_size=(pools,pools)))

    # Adding convolutional layer
    filters = random.randint(16, 32)
    print("filters 2: ", filters)
    temp_model.add(Conv2D(filters=filters,
                        kernel_size=(3,3),
                        activation='relu'))

    # Adding max pooling layer
    pools = random.randint(2, 5)
    print("pooling 2: ", pools)
    temp_model.add(MaxPooling2D(pool_size=(pools,pools)))

    # Adding a flattened layer to input our image data
    temp_model.add(Flatten())

    # Adding a dense layer with 64 neurons
    neurons = random.randint(60, 128)
    print("neurons: ", neurons)
    temp_model.add(Dense(neurons, activation='relu'))

    # Adding a dropout layer for regularization
    dropout = random.randint(20, 50)
    dropout /= 100
    print("dropout: ", dropout)
    temp_model.add(Dropout(dropout))

    # Adding an output layer
    temp_model.add(Dense(18, activation='softmax'))

    # Compiling our neural network
    temp_model.compile(loss='categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

    # Fitting our neural network
    batch_size = random.randint(1, 18)
    epochs = random.randint(7, 15)
    print("batch size: ", batch_size)
    print("epochs: ", epochs)
    histories[model_name] = temp_model.fit(X_train,
                            y_train, 
                            batch_size=batch_size,
                            validation_data=(X_test, y_test),
                            epochs=epochs)
    
    if histories[model_name].history['accuracy'] > top_acc:
        top_acc = histories[model_name].history['accuracy']
        model_dir = 'C:\\Users\\Zack\\Desktop\\work\\OSU\\467_capstone\\Genre_classification\\Model'
        model_name = model_name + ".h5"
        save_path = os.path.join(subdir, model_name)
        temp_model.save(save_path)
        print("saved ", model_name)
        print("accuracy: ", top_acc)


end = time.time()
print(end - start)



In [None]:
# Checking the model summary
cnn_model.summary()

In [None]:
# The code in this cell was adapted from a lecture at General Assembly

# Check out our train loss and test loss over epochs.
train_loss = history.history['loss']
test_loss = history.history['val_loss']

# Set figure size.
plt.figure(figsize=(12, 8))

# Generate line plot of training, testing loss over epochs.
plt.plot(train_loss, label='Training Loss', color='blue')
plt.plot(test_loss, label='Testing Loss', color='red')

# Set title
plt.title('Training and Testing Loss by Epoch', fontsize = 25)
plt.xlabel('Epoch', fontsize = 18)
plt.ylabel('Categorical Crossentropy', fontsize = 18)
plt.xticks(range(1,16), range(1,16))

plt.legend(fontsize = 18);

In [None]:
# The code in this cell was adapted from a lecture at General Assembly

# Check out our train accuracy and test accuracy over epochs.
train_loss = history.history['accuracy']
test_loss = history.history['val_accuracy']

# Set figure size.
plt.figure(figsize=(12, 8))

# Generate line plot of training, testing loss over epochs.
plt.plot(train_loss, label='Training Accuracy', color='blue')
plt.plot(test_loss, label='Testing Accuracy', color='red')

# Set title
plt.title('Training and Testing Accuracy by Epoch', fontsize = 25)
plt.xlabel('Epoch', fontsize = 18)
plt.ylabel('Accuracy', fontsize = 18)
plt.xticks(range(1,21), range(1,21))

plt.legend(fontsize = 18);

In [None]:
test = cnn_model.evaluate(X_test, y_test, verbose=0)
print('Test loss: ', test[0])
print('Test accuracy: ', test[1])

In [None]:
# Making predictions from the cnn model
predictions = cnn_model.predict(X_test, verbose=1)

In [None]:
## CONFUSION MATRIX
# Checking the number of targets per class
for i in range(18): 
    print(f'{i}: {sum([1 for target in y_test if target[i] == 1])}')

In [None]:
# Checking the number of predicted values in each class
for i in range(18): 
    print(f'{i}: {sum([1 for prediction in predictions if np.argmax(prediction) == i])}')

In [None]:
# Calculating the confusion matrix 
# row: actual
# columns: predicted
conf_matrix = confusion_matrix(np.argmax(y_test, 1), np.argmax(predictions, 1))
conf_matrix

In [None]:
# Creating a dataframe of the confusion matrix with labels for readability 
confusion_df = pd.DataFrame(conf_matrix)
confusion_df

In [None]:
# List of a subset of the genres
labels_dict = {
    0: 'Ambient Electronic',
    1: 'Chiptune',
    2: 'Classical',
    3: 'Country',
    4: 'Electronic',
    5: 'Folk',
    6: 'Hip-Hop',
    7: 'Indie-Rock',
    8: 'Jazz',
    9: 'Metal',
    10: 'Pop',
    11: 'Post-Rock',
    12: 'Psych-Rock',
    13: 'Punk',
    14: 'Reggae',
    15: 'Rock',
    16: 'Techno',
    17: 'Trip-Hop'
}



In [None]:
# Renaming rows and columns with labes
confusion_df = confusion_df.rename(columns=labels_dict)
confusion_df.index = confusion_df.columns
confusion_df

In [None]:
# Creating a heatmap for the confusion matrix for display
plt.figure(figsize= (20,12))
sns.set(font_scale = 2);
ax = sns.heatmap(confusion_df, annot=True, cmap=sns.cubehelix_palette(50));
ax.set(xlabel='Predicted Values', ylabel='Actual Values');

In [None]:
# from joblib import dump, load

# # swtich to model directory
# os.chdir(r"/Volumes/APE_External/Dropbox/_Classes/21_Winter/CS_467/Project_Folder/Music_Genre_Classification/Model")

# # create and save file
# joblib_file = "model_030921.joblib"
# dump(cnn_model, joblib_file)

In [45]:
from keras.models import load_model

models['model_0'].save('C:\\Users\\Zack\\Desktop\\work\\OSU\\467_capstone\\Genre_classification\\Model\\model_0_310_10am.h5')

In [None]:
os.chdir('C:\\Users\\Zack\\Desktop\\work\\OSU\\467_capstone\\Genre_classification\\Model')
model = load_model('model_030921.h5')