In [26]:
#!pip install opencv-python

In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow import keras
from tensorflow.keras import layers
import os
import glob
import cv2

In [2]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

# Splitting dataset (splitting paths leading to images)

## getting dataset from kaggle

In [3]:
! mkdir -p ~/.kaggle;
! cp kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json

In [30]:
! kaggle datasets download -d andradaolteanu/gtzan-dataset-music-genre-classification

gtzan-dataset-music-genre-classification.zip: Skipping, found more recently modified local copy (use --force to force download)


In [31]:
! unzip gtzan-dataset-music-genre-classification.zip

Archive:  gtzan-dataset-music-genre-classification.zip
replace Data/features_30_sec.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace Data/features_3_sec.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace Data/genres_original/blues/blues.00000.wav? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace Data/genres_original/blues/blues.00001.wav? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

## creating dataset with paths to spectograms

In [3]:
# specify path to spectogram images
PATH = r"/content/Data/images_original/"

In [4]:
genres = os.listdir(PATH)

In [5]:
# getting path of all file names in directories
files = {}
for genre in genres:
    files[genre] = glob.glob(f"{PATH}/{genre}/*png")

In [6]:
# creating dataframe wit paths
df = pd.DataFrame(columns = ['path', 'rgb_data', 'label'])
for genre in genres:
    df_aux = pd.DataFrame(files[genre], columns = ['path'])
    df_aux['label'] = genre
    df = df.append(df_aux, ignore_index=True, sort=False)

# verifying number of rows
df['label'].value_counts()

metal        100
reggae       100
country      100
rock         100
blues        100
disco        100
pop          100
classical    100
hiphop       100
jazz          99
Name: label, dtype: int64

In [7]:
#getting rgb data for all images
for index in df.index:
    img = cv2.imread(df['path'].loc[index])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #df['rgb_data'].loc[index] = np.array(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), dtype=np.uint8)
    df['rgb_data'].loc[index] = np.asarray(img).astype('float32')

In [8]:
df.head()

Unnamed: 0,path,rgb_data,label
0,/content/Data/images_original//classical/class...,"[[[255.0, 255.0, 255.0], [255.0, 255.0, 255.0]...",classical
1,/content/Data/images_original//classical/class...,"[[[255.0, 255.0, 255.0], [255.0, 255.0, 255.0]...",classical
2,/content/Data/images_original//classical/class...,"[[[255.0, 255.0, 255.0], [255.0, 255.0, 255.0]...",classical
3,/content/Data/images_original//classical/class...,"[[[255.0, 255.0, 255.0], [255.0, 255.0, 255.0]...",classical
4,/content/Data/images_original//classical/class...,"[[[255.0, 255.0, 255.0], [255.0, 255.0, 255.0]...",classical


In [9]:
from sklearn.preprocessing import LabelEncoder

In [10]:
# encoding label
le = LabelEncoder()
le.fit(genres)
df['label_encoded'] = le.transform(df['label'])

In [11]:
df['label_encoded']

0      1
1      1
2      1
3      1
4      1
      ..
994    4
995    4
996    4
997    4
998    4
Name: label_encoded, Length: 999, dtype: int64

## splitting dataset

In [12]:
from sklearn.model_selection import train_test_split

In [13]:
#defining X and y
X = [data for data in df['rgb_data']]
X = np.array(X)
y = np.array(df['label_encoded'])

#splitting data and stratifying by y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Modelling

In [14]:
shape_of_input = df['rgb_data'][0].shape
shape_of_input

(288, 432, 3)

In [15]:
model = keras.Sequential([
    layers.Rescaling(1 / 255, input_shape=shape_of_input),
    
    layers.Conv2D(8, kernel_size=(3, 3), strides=(1, 1), activation='relu'),
    layers.BatchNormalization(axis=3),
    layers.MaxPooling2D((2,2)),
    
    layers.Conv2D(16, kernel_size=(3, 3), strides=(1, 1), activation='relu'),
    layers.BatchNormalization(axis=3),
    layers.MaxPooling2D((2,2)),
    
    layers.Conv2D(32, kernel_size=(3, 3), strides=(1, 1), activation='relu'),
    layers.BatchNormalization(axis=3),
    layers.MaxPooling2D((2,2)),
    
    layers.Conv2D(64, kernel_size=(3, 3), strides=(1, 1), activation='relu'),
    layers.BatchNormalization(axis=-1),
    layers.MaxPooling2D((2,2)),
    
    layers.Conv2D(128, kernel_size=(3, 3), strides=(1, 1), activation='relu'),
    layers.BatchNormalization(axis=-1),
    layers.MaxPooling2D((2,2)),

    layers.Flatten(),
    
    layers.Dropout(rate=0.3),
    
    layers.Dense(len(genres), activation='softmax')
])

In [16]:
print(f'Model input shape: {model.input_shape}, Model output shape: {model.output_shape}')

Model input shape: (None, 288, 432, 3), Model output shape: (None, 10)


In [17]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rescaling (Rescaling)        (None, 288, 432, 3)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 286, 430, 8)       224       
_________________________________________________________________
batch_normalization (BatchNo (None, 286, 430, 8)       32        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 143, 215, 8)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 141, 213, 16)      1168      
_________________________________________________________________
batch_normalization_1 (Batch (None, 141, 213, 16)      64        
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 70, 106, 16)       0

In [18]:
from keras.callbacks import EarlyStopping
# defining call back
callback = EarlyStopping(monitor="val_loss", patience=150, verbose=1, restore_best_weights=True)

In [19]:
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', metrics = 'accuracy')
model.fit(X_train, y_train, validation_split=0.1, batch_size=16, epochs=1000, callbacks=[callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7f1220d0d850>

In [22]:
evaluation = model.evaluate(X_test, y_test)
print(f'Accuracy achieved on test set: {evaluation[1]:.4f}')

Accuracy achieved on test set: 0.5650
