In [3]:
import os
import h5py
import librosa
import itertools
from copy import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import OrderedDict
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tqdm import tqdm

import tensorflow as tf
from keras.utils import to_categorical
from keras.models import Model, Sequential, load_model
from keras.layers import Input
from keras.layers import Add, Dense, Activation, PReLU, Conv2D
from keras.layers import MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.layers import Dropout, Flatten, BatchNormalization
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.applications.resnet50 import preprocess_input, ResNet50

In [None]:
# For reproducibility purposes
np.random.seed(42)

In [4]:
"""
@description: Method to split a song into multiple songs using overlapping windows
"""
def splitsongs(X, y, window = 0.05, overlap = 0.5):
    # Empty lists to hold our results
    temp_X = []
    temp_y = []

    # Get the input song array size
    xshape = X.shape[0]
    chunk = int(xshape*window)
    offset = int(chunk*(1.-overlap))
    
    # Split the song and create new ones on windows
    spsong = [X[i:i+chunk] for i in range(0, xshape - chunk + offset, offset)]
    for s in spsong:
        if s.shape[0] != chunk:
            continue

        temp_X.append(s)
        temp_y.append(y)

    return np.array(temp_X), np.array(temp_y)
    


In [5]:
"""
@description: Method to convert a list of songs to a np array of melspectrograms
"""
def to_melspectrogram(songs, n_fft=1024, hop_length=256):
    # Transformation function
    melspec = lambda x: librosa.feature.melspectrogram(x, n_fft=n_fft,
        hop_length=hop_length, n_mels=128)[:,:,np.newaxis] #keep n_mels=128. other values are for experimenting

    # map transformation of input songs to melspectrogram using log-scale
    tsongs = map(melspec, songs)
    # np.array([librosa.power_to_db(s, ref=np.max) for s in list(tsongs)])
    return np.array(list(tsongs))


In [6]:
def split_convert(X, y):
    arr_spec, arr_genre = [], []
    
    # Convert to spectrograms and split into small windows
    for fn, genre in tqdm(zip(X, y),total=len(y),desc='Processing Audio Files'):
        signal, sr = librosa.load(fn)
        signal = signal[:song_samples]

        # Convert to dataset of spectograms/melspectograms
        signals, y = splitsongs(signal, genre, window=0.05) #keep window=0.05. Other values are for experimenting. 

        # Convert to "spec" representation
        specs = to_melspectrogram(signals)

        # Save files
        arr_genre.extend(y)
        arr_spec.extend(specs)
    
    return np.array(arr_spec), to_categorical(arr_genre)
    


In [13]:
def read_data(src_dir, genres, song_samples, get_data='train'):    
    # Empty array of dicts with the processed features from all files
    arr_fn = []
    arr_genres = []

    # Get file list from the folders
    if get_data=='train':
        for x,_ in genres.items():
            print('x= ', x)
            folder = src_dir+'/'+x#'train'+'/' + x
            for root, subdirs, files in os.walk(folder):
                i = 0
                for file in files:
                    file_name = folder + "/" + file

                    # Save the file name and the genre
                    arr_fn.append(file_name)
                    arr_genres.append(genres[x])
                    
                    i += 1

                    # Get only first 80% of the files from each folder
                    if i >= round(len(files)*0.8//10):
                        break
        
        # Split into small segments and convert to spectrogram
        X_train, y_train = split_convert(arr_fn, arr_genres)
        return X_train, y_train
    
    elif get_data=='test':
        folder = src_dir+'/'+x#'test'
        for root, subdirs, files in os.walk(folder):
            for idx, file in enumerate(files):
                file_name = folder + "/" + file
                # Save the file name and the genre
                arr_fn.append(file_name)
                arr_genres.append(idx//10)
    
        X_test, y_test = split_convert(arr_fn, arr_genres)
        return X_test, y_test
    else:
        #print('Specify "test" or "train"')
        return None, None

In [14]:
# Parameters
gtzan_dir = 'music_genre_classification/Data/genres_original'
song_samples = 660000
genres = {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 
          'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}

# Read the data
X_train, y_train = read_data(gtzan_dir, genres, song_samples, get_data='train')
X_test, y_test = read_data(gtzan_dir, genres, song_samples, get_data='test')

x=  blues
x=  classical
x=  country
x=  disco
x=  hiphop
x=  jazz
x=  metal
x=  pop
x=  reggae
x=  rock


  melspec = lambda x: librosa.feature.melspectrogram(x, n_fft=n_fft,
  0.03085327] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  melspec = lambda x: librosa.feature.melspectrogram(x, n_fft=n_fft,
  0.01605225] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  melspec = lambda x: librosa.feature.melspectrogram(x, n_fft=n_fft,
 -0.09613037] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  melspec = lambda x: librosa.feature.melspectrogram(x, n_fft=n_fft,
  melspec = lambda x: librosa.feature.melspectrogram(x, n_fft=n_fft,
  0.00115967] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  melspec = lambda x: librosa.feature.melspectrogram(x, n_fft=n_fft,
  melspec = lambda x: librosa.feature.melspectrogram(x, n_fft=n_fft,
  0.05090332] as keyword args. From version 0.10 passing these as positio

KeyboardInterrupt: 

In [None]:
# Histogram for train and test 
values, count = np.unique(np.argmax(y_train, axis=1), return_counts=True)
plt.bar(values, count)

values, count = np.unique(np.argmax(y_test, axis=1), return_counts=True)
plt.bar(values, count)
plt.show()