# Music Genre Classification

## Data Pre-Processing for Recurrent Neural Network

Fabrizio Niro - Jacopo Signò

GTZAN Dataset - Music Genre Classification 

https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification

In [12]:
import librosa as lr
import pandas as pd
import numpy as np
import IPython.display as ipyd
import math
import os
import pickle
import torch
import sklearn
import torch.nn.functional as tf
from sklearn import preprocessing

The function "tracksplit" splits the audio track, loaded into an array, in n subtracks

In [106]:
def tracksplit(track, nsplit = 10):
    #splits a track in n subtracks of equal length
    length = len(y) // nsplit
    subtracks = []
    acc = 0

    for i in range(nsplit):
        subtracks.append(y[acc:acc+length])
        acc += length

    return subtracks

We iterate through the dataset structure to split all the audio tracks into 10 tracks of 3 seconds length

In [None]:
sec3_subtracks = []

for directory in os.listdir('Data/genres_original'):

    for file in os.listdir('Data/genres_original/{}'.format(directory)):

        try:
            subtrack, _ = lr.load('Data/genres_original/{}/{}'.format(directory, file))

        except:
            subtrack, _ = lr.load('Data/genres_original/{}/{}{}'.format(directory, file[:-5], '0.wav'))

        for subtr in tracksplit(subtrack):

            sec3_subtracks.append(subtr)

In [None]:
with open('sec3_subtracks', 'wb') as f:
    pickle.dump(sec3_subtracks, f)

In [5]:
with open('sec3_subtracks', 'rb') as f:
    sec3_subtracks = pickle.load(f)

The pandas dataframe "data" stores for each row the track id, the label and the array of the splitted tracks

In [32]:
sec3_track_id = []
label = []

for dir in os.listdir('Data/genres_original'):

    for i in range(1000):

        sec3_track_id.append(f'{dir}_{i}')
        label.append(f'{dir}')

In [37]:
data = pd.DataFrame()
data['sec3_track_id'] = sec3_track_id
data['label'] = label
data['sec3_subtracks'] = sec3_subtracks

In [65]:
with open('data_id_lab_subt', 'wb') as f:
    pickle.dump(data, f)

In [4]:
with open('data_id_lab_subt', 'rb') as f:
    data = pickle.load(f)

In [5]:
data

Unnamed: 0,sec3_track_id,label,sec3_subtracks
0,blues_0,blues,"[0.0073242188, 0.016601562, 0.0076293945, -0.0..."
1,blues_1,blues,"[-0.072753906, -0.055389404, -0.036102295, -0...."
2,blues_2,blues,"[0.06997681, 0.14709473, 0.2263794, 0.28271484..."
3,blues_3,blues,"[-0.31854248, -0.2897339, -0.25097656, -0.2348..."
4,blues_4,blues,"[0.19113159, 0.12878418, 0.06561279, -0.004669..."
...,...,...,...
9995,rock_995,rock,"[0.21447754, 0.15588379, 0.11029053, 0.0905761..."
9996,rock_996,rock,"[-0.018249512, -0.04788208, -0.003479004, 0.05..."
9997,rock_997,rock,"[-0.011993408, -0.01550293, -0.0074157715, -0...."
9998,rock_998,rock,"[0.0814209, -0.0018310547, -0.0687561, -0.1164..."


We compute the Mel Frequency C Coef. for each splitted track. It will be the feature to feed the network with

In [None]:
sec3_mfcc = []

for i in range(len(data['sec3_subtracks'])):
    
    sec3_mfcc.append(lr.feature.mfcc(y=data['sec3_subtracks'][i]))

In [57]:
with open('sec3_mfcc', 'wb') as f: 
    pickle.dump(sec3_mfcc, f)

In [26]:
with open('sec3_mfcc', 'rb') as f:
    sec3_mfcc = pickle.load(f)

We store the MFCC matrices of all the splitted tracks into a torch tensor

In [29]:
sec3_mfcc = np.array(sec3_mfcc)
sec3_mfcc_tens = torch.tensor(sec3_mfcc)

In [30]:
with open('Processed_data/RNN/sec3_mfcc_tens', 'wb') as f: 
    pickle.dump(sec3_mfcc_tens, f)

In [31]:
with open('Processed_data/RNN/sec3_mfcc_tens', 'rb') as f:
    sec3_mfcc_tens = pickle.load(f)

In [32]:
sec3_mfcc_tens.shape

torch.Size([10000, 20, 130])

We create the target array encoding the labels with One Hot method

In [33]:
labels = np.array(data['label'])
le = preprocessing.LabelEncoder()
target = le.fit_transform(labels)
target = torch.tensor(target)
target.one_hot = tf.one_hot(target.to(torch.int64), 10)

"labels = np.array(data['label'])\nle = preprocessing.LabelEncoder()\ntarget = le.fit_transform(labels)\ntarget = torch.tensor(target)\ntarget.one_hot = tf.one_hot(target.to(torch.int64), 10)"

In [34]:
target.one_hot

tensor([[1, 0, 0,  ..., 0, 0, 0],
        [1, 0, 0,  ..., 0, 0, 0],
        [1, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 1],
        [0, 0, 0,  ..., 0, 0, 1],
        [0, 0, 0,  ..., 0, 0, 1]])

In [35]:
with open('Processed_data/RNN/target.one_hot', 'wb') as f: 
    pickle.dump(target.one_hot, f)

"with open('Processed_data/RNN/target.one_hot', 'wb') as f: \n    pickle.dump(target.one_hot, f)"

In [36]:
with open('Processed_data/RNN/target.one_hot', 'rb') as f:
    target.one_hot = pickle.load(f)