# A CNN music genre classification model using mel-spectrogram images from 30 sec audio file.

source data: smMELsg_30sec_Train_Test.zip

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ln -sf ./drive/MyDrive/projects/FNN_Project/data /content/data
!ln -sf ./drive/MyDrive/projects/FNN_Project/ref /content/ref
!cp ./ref/fma/utils.py ./
!pip install python-dotenv
!pip install scikeras
!pip install pydub

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0
Collecting scikeras
  Downloading scikeras-0.12.0-py3-none-any.whl (27 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.12.0
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


## Check image dimensions.

In [None]:
%%time
import cv2
import os

content_mel_dir = '/content/fma_small_specgram_img'

for genre in ['Pop', 'Rock', 'Electronic']:
  img_shapes = {}
  for filename in os.listdir(content_mel_dir + '/train/' + f"{genre}"):
    track_path = f'{content_mel_dir}/train/{genre}/{filename}'
    im = cv2.imread(track_path)
    if im.shape not in img_shapes:
      img_shapes[im.shape] = 1
    else:
      img_shapes[im.shape] += 1
  print(img_shapes)

{(450, 610, 3): 800}
{(450, 610, 3): 799}
{(450, 610, 3): 799}
CPU times: user 25.3 s, sys: 222 ms, total: 25.5 s
Wall time: 25.4 s


## Define CNN model.

In [None]:
from keras import layers
from keras.layers import (Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten,
                          Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D,
                          Dropout)
from keras.models import Model, load_model
from keras.preprocessing import image
from keras.optimizers import Adam

def GenreModel(input_shape = (450,610,3),classes=3):

  X_input = Input(input_shape)

  X = Conv2D(8,kernel_size=(3,3),strides=(1,1))(X_input)
  X = BatchNormalization(axis=3)(X)
  X = Activation('relu')(X)
  X = MaxPooling2D((2,2))(X)

  X = Conv2D(16,kernel_size=(3,3),strides = (1,1))(X)
  X = BatchNormalization(axis=3)(X)
  X = Activation('relu')(X)
  X = MaxPooling2D((2,2))(X)

  X = Conv2D(32,kernel_size=(3,3),strides = (1,1))(X)
  X = BatchNormalization(axis=3)(X)
  X = Activation('relu')(X)
  X = MaxPooling2D((2,2))(X)

  X = Conv2D(64,kernel_size=(3,3),strides=(1,1))(X)
  X = BatchNormalization(axis=-1)(X)
  X = Activation('relu')(X)
  X = MaxPooling2D((2,2))(X)

  X = Conv2D(128,kernel_size=(3,3),strides=(1,1))(X)
  X = BatchNormalization(axis=-1)(X)
  X = Activation('relu')(X)
  X = MaxPooling2D((2,2))(X)

  X = Flatten()(X)

  X = Dropout(rate=0.3)(X)

  X = Dense(classes, activation='softmax', name='fc' + str(classes))(X)

  model = Model(inputs=X_input,outputs=X,name='GenreModel')

  return model

In [None]:
#model = keras.models.load_model('./CNN_Split_v1.keras')

In [None]:
from keras.preprocessing.image import ImageDataGenerator

def get_data_generator(_source_dir, _shuffle):
  datagen = ImageDataGenerator(rescale=1./255)
  train_datagen = ImageDataGenerator(rescale=1./255)
  train_generator = train_datagen.flow_from_directory(_source_dir,target_size=(450,610),
                                                    color_mode="rgb",
                                                    class_mode='categorical',
                                                    batch_size=128,
                                                    shuffle = _shuffle)
  return train_generator

In [None]:
train_generator = get_data_generator(content_mel_dir + '/train', True)
test_generator = get_data_generator( content_mel_dir +'/test', False)

Found 2398 images belonging to 3 classes.
Found 300 images belonging to 3 classes.


## Fit model using all training data and test data as validation.

In [None]:
%%time
model = GenreModel()
opt = Adam(learning_rate=0.0005)
model.compile(optimizer = opt,loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator, validation_data=test_generator, epochs=30)
model.save('/content/data/mel_sg/models/CNN_30sec_img_RPE_m1_epoch30.keras')
!cp /content/data/mel_sg/models/CNN_30sec_img_RPE_m1_epoch30.keras ./data/mel_sg/



Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
CPU times: user 20min 24s, sys: 5min 14s, total: 25min 38s
Wall time: 23min 6s


<keras.src.callbacks.History at 0x796365265420>

In [None]:
import tensorflow as tf
loaded_model = tf.keras.models.load_model('/content/data/mel_sg/models/CNN_30sec_img_RPE_m1_epoch30.keras')
loaded_model.summary()

Model: "GenreModel"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 450, 610, 3)]     0         
                                                                 
 conv2d_15 (Conv2D)          (None, 448, 608, 8)       224       
                                                                 
 batch_normalization_15 (Ba  (None, 448, 608, 8)       32        
 tchNormalization)                                               
                                                                 
 activation_15 (Activation)  (None, 448, 608, 8)       0         
                                                                 
 max_pooling2d_15 (MaxPooli  (None, 224, 304, 8)       0         
 ng2D)                                                           
                                                                 
 conv2d_16 (Conv2D)          (None, 222, 302, 16)      1

# Perform 5-fold Cross validation

In [None]:
%%time
import os

def get_file_paths(_split):
  file_paths = []
  for genre in ['Pop', 'Rock', 'Electronic']:
    img_shapes = {}
    for filename in os.listdir(content_mel_dir + '/' + _split + '/' + f"{genre}"):
      track_path = f'{content_mel_dir}/{_split}/{genre}/{filename}'
      file_paths.append((track_path, genre, filename))
  len(file_paths)
  return file_paths

CPU times: user 7 µs, sys: 2 µs, total: 9 µs
Wall time: 12.2 µs


In [None]:
train_file_paths = get_file_paths('train')
train_file_paths[0]

('/content/fma_small_specgram_img/train/Pop/48457.png', 'Pop', '48457.png')

## Shuffle paths to image files.

In [None]:
%%time
import random

random.seed( 0 )

random.shuffle(train_file_paths)

def split(a, n):
    k, m = divmod(len(a), n)
    return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))

n_folds = 5
k_folds = list(split(train_file_paths, n_folds))
print(len(train_file_paths))
print()
for f in k_folds:
  print(len(f))
print()
print(sum([len(f) for f in k_folds]))
print(len(k_folds))

2398

480
480
480
479
479

2398
5
CPU times: user 616 µs, sys: 797 µs, total: 1.41 ms
Wall time: 1.38 ms


## Create 5 folds of image files by copying source images into fold directories.

In [None]:
%%time

#BE SURE TO DELTE OLD /content/kfolds directory.

import os
import shutil
import itertools

def copy_fold(_fold, _target_dir):
  for f in _fold:
    src = f[0]
    genre = f[1]
    filename = f[2]

    genre_dir = f'{_target_dir}/{genre}'
    dst = f'{genre_dir}/{filename}'
    os.makedirs(genre_dir, exist_ok=True)
    shutil.copyfile(src, dst)

n_folds = len(k_folds)
#Create combinations of folds, excluding one fold for test.
fold_combos = list(itertools.combinations(list(range(n_folds)), n_folds-1))
#Get test folds
test_folds = [idx for fc in fold_combos for idx in range(n_folds) if idx not in fc]
print(f'Train folds: {fold_combos}')
print(f'Test folds: {test_folds}')

for fold_idx, fc in enumerate(fold_combos):
  print(f'Fold: {fold_idx}')
  fold_dir = '/content/kfolds/fold' + str(fold_idx)
  train_target_dir = fold_dir +'/train'
  print(f'  Train folds: {fc}')
  sub_folds = [k_folds[x] for x in fc]
  for fold in sub_folds:
    copy_fold(fold, train_target_dir)
  test_target_dir = fold_dir +'/test'
  print(f'  Test fold: {test_folds[fold_idx]}')
  copy_fold(k_folds[test_folds[fold_idx]], test_target_dir)

Train folds: [(0, 1, 2, 3), (0, 1, 2, 4), (0, 1, 3, 4), (0, 2, 3, 4), (1, 2, 3, 4)]
Test folds: [4, 3, 2, 1, 0]
Fold: 0
  Train folds: (0, 1, 2, 3)
  Test fold: 4
Fold: 1
  Train folds: (0, 1, 2, 4)
  Test fold: 3
Fold: 2
  Train folds: (0, 1, 3, 4)
  Test fold: 2
Fold: 3
  Train folds: (0, 2, 3, 4)
  Test fold: 1
Fold: 4
  Train folds: (1, 2, 3, 4)
  Test fold: 0
CPU times: user 1.07 s, sys: 3.71 s, total: 4.77 s
Wall time: 10.8 s


In [None]:
!ls /content/kfolds/
!ls /content/kfolds/fold0/
!ls /content/kfolds/fold0/test
!ls /content/kfolds/fold0/train

fold0  fold1  fold2  fold3  fold4
test  train
Electronic  Pop  Rock
Electronic  Pop  Rock


In [None]:
!find /content/kfolds/fold0/ -type f | wc -l
!find /content/kfolds/fold0/train/ -type f | wc -l
!find /content/kfolds/fold0/test -type f | wc -l

2398
1919
479


In [None]:
!find /content/kfolds/fold0/train/Pop -type f | wc -l
!find /content/kfolds/fold0/train/Electronic -type f | wc -l
!find /content/kfolds/fold0/train/Rock -type f | wc -l

633
639
647


In [None]:
!find /content/kfolds/fold0/test/Pop -type f | wc -l
!find /content/kfolds/fold0/test/Electronic -type f | wc -l
!find /content/kfolds/fold0/test/Rock -type f | wc -l

167
160
152


## File and collect peformance statisitics. 4 folds are used for training, test dataset is used for validation.

In [None]:
%%time
train_generator = get_data_generator('/content/kfolds/fold0/train', True)
test_generator = get_data_generator('/content/kfolds/fold0/test', False)
model = GenreModel()
opt = Adam(learning_rate=0.0005)
model.compile(optimizer = opt,loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator, validation_data=test_generator, epochs=30)
model.save('/content/CNN_30sec_img_5Fold0_m1_epoch30.keras')
!cp /content/CNN_5_Fold0_m1_epoch20.keras ./data/mel_sg/

Found 1919 images belonging to 3 classes.
Found 479 images belonging to 3 classes.




Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
cp: cannot stat '/content/CNN_5_Fold0_m1_epoch20.keras': No such file or directory
CPU times: user 18min 4s, sys: 4min 42s, total: 22min 47s
Wall time: 20min 39s


In [None]:
%%time
train_generator = get_data_generator('/content/kfolds/fold1/train', True)
test_generator = get_data_generator('/content/kfolds/fold1/test', False)
model = GenreModel()
opt = Adam(learning_rate=0.0005)
model.compile(optimizer = opt,loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator, validation_data=test_generator, epochs=30)
#model.save('/content/CNN_30sec_img_5Fold0_m1_epoch30.keras')
#!cp /content/CNN_5_Fold0_m1_epoch20.keras ./data/mel_sg/

Found 1919 images belonging to 3 classes.
Found 479 images belonging to 3 classes.




Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
CPU times: user 18min 31s, sys: 4min 46s, total: 23min 18s
Wall time: 21min 11s


<keras.src.callbacks.History at 0x796364254d60>

In [None]:
%%time
train_generator = get_data_generator('/content/kfolds/fold2/train', True)
test_generator = get_data_generator('/content/kfolds/fold2/test', False)
model = GenreModel()
opt = Adam(learning_rate=0.0005)
model.compile(optimizer = opt,loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator, validation_data=test_generator, epochs=30)

Found 1918 images belonging to 3 classes.
Found 480 images belonging to 3 classes.




Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
CPU times: user 18min 35s, sys: 4min 46s, total: 23min 22s
Wall time: 21min 13s


<keras.src.callbacks.History at 0x79636412c4c0>

In [None]:
%%time
train_generator = get_data_generator('/content/kfolds/fold3/train', True)
test_generator = get_data_generator('/content/kfolds/fold3/test', False)
model = GenreModel()
opt = Adam(learning_rate=0.0005)
model.compile(optimizer = opt,loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator, validation_data=test_generator, epochs=30)

Found 1918 images belonging to 3 classes.
Found 480 images belonging to 3 classes.




Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
CPU times: user 18min 33s, sys: 4min 48s, total: 23min 21s
Wall time: 21min 13s


<keras.src.callbacks.History at 0x796346c55780>

In [None]:
%%time
train_generator = get_data_generator('/content/kfolds/fold4/train', True)
test_generator = get_data_generator('/content/kfolds/fold4/test', False)
model = GenreModel()
opt = Adam(learning_rate=0.0005)
model.compile(optimizer = opt,loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator, validation_data=test_generator, epochs=30)

Found 1918 images belonging to 3 classes.
Found 480 images belonging to 3 classes.




Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
CPU times: user 18min 34s, sys: 4min 48s, total: 23min 23s
Wall time: 21min 15s


<keras.src.callbacks.History at 0x7963640ef100>