In [None]:
'''
This notebook is oriented to the creation of an pandas dataframe builded from GTZAN Music Genre Dataset:
This features are: cqt, rmse, energy, mfccs, chromagram and spectral contrast
This notebook is currently in development phase, so contains a considerably quantity of test code
'''

In [54]:
import pandas as pd
from pathlib import Path
import librosa, librosa.display
import matplotlib.pyplot as plt
import numpy as np
import gc
import os
import pydub


In [1]:

"""
This block is aim to read original GTZAN Dataset
"""

# Reading Audio Data from GTZAN

print('iniciando...')

audios_blues = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/blues/blues.000*.au')
]

audios_classical = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/classical/classical.000*.au')
]

audios_country = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/country/country.000*.au')
]

audios_disco = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/disco/disco.000*.au')
]

audios_hiphop = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/hiphop/hiphop.000*.au')
]

audios_jazz = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/jazz/jazz.000*.au')
]

audios_metal = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/metal/metal.000*.au')
]

audios_pop = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/pop/pop.000*.au')
]

audios_reggae = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/reggae/reggae.000*.au')
]

audios_rock = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/rock/rock.000*.au')
]


print('Ha salio bien')

iniciando...
Ha salio bien


In [16]:

"""
This block is aim to read slice in 10 GTZAN Dataset
"""

# Reading Audio Data

print('iniciando...')

audios_blues = [
        librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/audio3sec/blues/blues0*')
]

audios_classical = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/audio3sec/classical/classical0*')
]

audios_country = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/audio3sec/country/country0*')
]

audios_disco = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/audio3sec/disco/disco0*')
]

audios_hiphop = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/audio3sec/hiphop/hiphop0*')
]

audios_jazz = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/audio3sec/jazz/jazz0*')
]

audios_metal = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/audio3sec/metal/metal0*')
]

audios_pop = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/audio3sec/pop/pop0*')
]

audios_reggae = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/audio3sec/reggae/reggae0*')
]

audios_rock = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/audio3sec/rock/rock0*')
]

print('Ha salio bien')


iniciando...
Ha salio bien


In [None]:
"""
This block is aim to read original ISMIR04 Dataset
"""

# Reading Audio Data from ISMIR04

print('iniciando...')

audios_classical = [
    librosa.load(p)[0] for p in Path().glob('Datasets/ismir04_genre/audio/evaluation/classical/classical0*')
]

audios_electronic = [
    librosa.load(p)[0] for p in Path().glob('Datasets/ismir04_genre/audio/evaluation/electronic/electronic0*')
]

audios_jazz = [
    librosa.load(p)[0] for p in Path().glob('Datasets/ismir04_genre/audio/evaluation/jazz/jazz0*')
]

audios_metal = [
    librosa.load(p)[0] for p in Path().glob('Datasets/ismir04_genre/audio/evaluation/metal/metal0*')
]

audios_pop = [
    librosa.load(p)[0] for p in Path().glob('Datasets/ismir04_genre/audio/evaluation/pop/pop0*')
]

audios_punk = [
    librosa.load(p)[0] for p in Path().glob('Datasets/ismir04_genre/audio/evaluation/punk/punk0*')
]

audios_rock = [
    librosa.load(p)[0] for p in Path().glob('Datasets/ismir04_genre/audio/evaluation/rock/rock0*')
]

audios_world = [
    librosa.load(p)[0] for p in Path().glob('Datasets/ismir04_genre/audio/evaluation/world/world0*')
]

print('Ha salio bien')

iniciando...
Ha salio bien


In [48]:
# Funtions to extracting features 

import sklearn
import librosa

class FeaturesExtractor():
    """
    Class that contains several methods to extract means audio features from an audioArray
    If user dont use specific parameters class use default values
    This class make use of sklean and librosa packages
    
    feature_to_extract = possible values = "mean" or "var"
    """
    
    def __init__(self, hop_length=1024, frame_length=2048, sr=22050, fmin=36, n_bins= 72, lag_seconds=4, feature_to_extract="mean"):
        self.hop_length = hop_length
        self.frame_length = frame_length
        self.sr = sr
        self.fmin = fmin
        self.n_bins = n_bins
        self.lag_seconds = lag_seconds
        self.feature_to_extract = feature_to_extract
    
    def get_short_time_energy(self, audio_array, sr=None, hop_length=None, frame_length=None):
        if sr is None: sr=self.sr
        if hop_length is None: hop_length=self.hop_length
        if frame_length is None: frame_length=self.frame_length
        
        librosa.get_duration(audio_array, sr)
        energy = np.array([
            sum(abs(audio_array[i:i+frame_length]**2))
            for i in range(0, len(audio_array), hop_length)
        ])
        
        if self.feature_to_extract is "mean": return energy.mean(axis=0)
        elif self.feature_to_extract is "var": return energy.var()
        else: return energy

    def get_rmse(self, audio_array, sr=None, hop_length=None, frame_length=None):
        if sr is None: sr=self.sr
        if hop_length is None: hop_length=self.hop_length
        if frame_length is None: frame_length=self.frame_length
        
        rmse = librosa.feature.rms(audio_array, frame_length=frame_length, hop_length=hop_length, center=True)
        rmse = rmse[0]
        if self.feature_to_extract is "mean": return rmse.mean(axis=0)
        elif self.feature_to_extract is "var": return rmse.var()
        else: return rmse


    def get_zcr(self, audio_array):
#       we add a little constant for avoid silence near 0
        zero_crossings = librosa.zero_crossings(audio_array + 0.0001)
        zero_crossings = sum(zero_crossings)
        return zero_crossings
#         return zero_crossings.mean()

    def get_mfccs_means(self, audio_array, sr=None, number_mfccs=13):
        if sr is None: sr=self.sr
            
        mfccs = librosa.feature.mfcc(audio_array, sr, n_mfcc=number_mfccs)
        mfccs_means = np.arange(number_mfccs)
        for i in  range(0, len(mfccs)):
            if self.feature_to_extract is "mean": mfccs_means[i] = mfccs[i].mean(axis=0).mean()
            elif self.feature_to_extract is "var": mfccs_means[i] = mfccs[i].var()
            else: mfccs_means[i] = mfccs[i].mean(axis=0).mean()
            
        
        return mfccs_means
    
#         mfccs = sklearn.preprocessing.scale(mfccs, axis=1)
#         print()
#         print (mfccs.mean(axis=1))
#         print (mfccs.var(axis=1))
#         librosa.display.specshow(mfccs, sr=sr, x_axis='time')
    
    def get_cqt(self, audio_array, sr=None, hop_length=None, fmin=None, n_bins=None):
        if sr is None: sr=self.sr
        if hop_length is None: hop_length=self.hop_length
        if fmin is None: fmin=self.fmin
        if n_bins is None: n_bins=self.n_bins
        
        C = librosa.cqt(audio_array, sr=sr, fmin=fmin, n_bins=n_bins, hop_length=hop_length)
#         return C
        if self.feature_to_extract is "mean": feature = C.mean(axis=0).mean(axis=0)
        elif self.feature_to_extract is "var": feature = C.var()
        else: feature = C.mean(axis=0).mean(axis=0)
        return abs(feature)
        
    def get_chromagram(self, audio_array, sr=None, hop_length=None):
        if sr is None: sr=self.sr
        if hop_length is None: hop_length=self.hop_length
            
        chromagram = librosa.feature.chroma_cens(audio_array, sr=sr, hop_length=hop_length)
#         return chromagram
        if self.feature_to_extract is "mean": feature = chromagram.mean(axis=0).mean(axis=0)
        elif self.feature_to_extract is "var": feature = chromagram.var()
        else: feature = chromagram.mean(axis=0).mean(axis=0)
        return feature

    def get_spectral_contrast(self, audio_array, sr=None):
        if sr is None: sr=self.sr
            
        spectral_contrast = librosa.feature.spectral_contrast(audio_array, sr=self.sr)
#         return spectral_contrast
        if self.feature_to_extract is "mean": feature = spectral_contrast.mean(axis=0).mean(axis=0)
        elif self.feature_to_extract is "var": feature = spectral_contrast.var()
        else: feature = spectral_contrast.mean(axis=0).mean(axis=0)
        return feature

    def get_autocorrelation(self, audio_array, sr=None, hop_length=None, lag_seconds=None):
        if sr is None: sr=self.sr
        if hop_length is None: hop_length=self.hop_length
        if lag_seconds is None: lag_seconds=self.lag_seconds
        
        max_size = lag_seconds * sr / hop_length
        autocorrelation = librosa.autocorrelate(audio_array, max_size)
        
        if self.feature_to_extract is "mean": feature = autocorrelation.mean(axis=0)
        elif self.feature_to_extract is "var": feature = autocorrelation.var()
        else: feature = autocorrelation.mean(axis=0)
        return feature

    # Function that obtain Short Time Fourier Transform from an audio samples array
    def get_stft(self, audio_array, hop_length=None, frame_length=None):
        if hop_length is None: hop_length=self.hop_length
        if frame_length is None: frame_length=self.frame_length
            
        stft = librosa.stft(audio_array, n_fft=frame_length, hop_length=hop_length)
        
        if self.feature_to_extract is "mean": feature = stft.mean(axis=0).mean(axis=0)
        elif self.feature_to_extract is "var": feature = stft.var()
        else: feature = stft.mean(axis=0).mean(axis=0)
        return abs(feature)
    
    def get_rolloff(self, audio_array, sr=None):
        if sr is None: sr=self.sr
        
        spectral_rolloff = librosa.feature.spectral_rolloff(audio_array+0.01, sr=sr)
        
        if self.feature_to_extract is "mean": feature = spectral_rolloff.mean(axis=0).mean(axis=0)
        elif self.feature_to_extract is "var": feature = spectral_rolloff.var()
        else: feature = spectral_rolloff.mean(axis=0).mean(axis=0)
        return feature

    def get_spectral_centroid(self, audio_array, sr=None):
        if sr is None: sr=self.sr
        
        spectral_centroids = librosa.feature.spectral_centroid(audio_array, sr=sr)
        
        if self.feature_to_extract is "mean": feature = spectral_centroids.mean(axis=0).mean(axis=0)
        elif self.feature_to_extract is "var": feature = spectral_centroids.var()
        else: feature = spectral_centroids.mean(axis=0).mean(axis=0)
        return feature
    
    # Codigo de prueba 
    def GET_SHOW_spectogram(self, stft, sr=None, hop_length=None):
        if sr is None: sr=self.sr
        if hop_length is None: hop_length=self.hop_length
            
        S = librosa.amplitude_to_db(abs(stft))
        librosa.display.specshow(S, sr=sr, hop_length=hop_length, x_axis='time', y_axis='linear')


In [49]:
# Creating a pandas data frame with features

class MusicFeaturesDictCreator:
    """
    Class aim to create a dictionary with music features, concatenate music feature dictionaries 
    and export a final dictionary to a pandas dataFrame
    """
    
    def __init__(self, features_dictionary=None , number_mfccs=None):
        if features_dictionary is not None:
            self.features_dictionary = features_dictionary
            self.features_dictionary_keys = list(self.features_dictionary.keys())
        else:
            self.features_dictionary = {
                    'Genre': [],
                    'Energy': [],
                    'RMSE': [],
                    'ZCR': [],
                    'MFCCS': number_mfccs,
                    'CQT': [],
                    'Spectral_Contrast': [],
                    'Chromagram':[],
                    'Tempo':[],
                    'Autocorrelation':[],
                    'STFT':[],
                    'Rolloff':[],
                    'Spectral_Centroid':[],
            }
            self.features_dictionary_keys = list(self.features_dictionary.keys())
            

    def ampliate_features_dictionary(self, audios_list, label_name, features_extractor:FeaturesExtractor):
        """
        Make an addition of features extracted from an audio list to the instance dictionary created in the intantiation of the class
        """
        
        for audio_array in audios_list:
                
            self.features_dictionary['Genre'].append(label_name)
                
            if len(self.features_dictionary_keys) > 1:
                short_time_energy = features_extractor.get_short_time_energy(audio_array)
                self.features_dictionary[self.features_dictionary_keys[1]].append(short_time_energy)
                
            if len(self.features_dictionary_keys) > 2:
                rmse = features_extractor.get_rmse(audio_array)
                self.features_dictionary[self.features_dictionary_keys[2]].append(rmse)
                
            if len(self.features_dictionary_keys) > 3:
                zcr = features_extractor.get_zcr(audio_array)
                self.features_dictionary[self.features_dictionary_keys[3]].append(zcr)
            
            if len(self.features_dictionary_keys) > 4:
                number_mfccs = self.features_dictionary['MFCCS']
                mfccs = features_extractor.get_mfccs_means(audio_array, number_mfccs)
                print(mfccs)
                print(mfccs.shape)
                
               # Initializion of mfccs keys if yet it not exist
                if not 'MFCCS0' in self.features_dictionary:
                    for i in range(0, len(mfccs)):
                        self.features_dictionary['MFCCS'+str(i)] = []
                        
                for i in range(0, len(mfccs)):
                    self.features_dictionary['MFCCS'+str(i)].append(mfccs[i])
            
            if len(self.features_dictionary_keys) > 5:
                cqt = features_extractor.get_cqt(audio_array)
                self.features_dictionary[self.features_dictionary_keys[5]].append(cqt)
            
            if len(self.features_dictionary_keys) > 6:
                spectral_contrast = features_extractor.get_spectral_contrast(audio_array)
                self.features_dictionary[self.features_dictionary_keys[6]].append(spectral_contrast)
            
            if len(self.features_dictionary_keys) > 7:
                chromagram = features_extractor.get_chromagram(audio_array)
                self.features_dictionary[self.features_dictionary_keys[7]].append(chromagram)
            
            if len(self.features_dictionary_keys) > 8:
                tempo = librosa.beat.tempo(audio_array, sr=22050)[0]
                self.features_dictionary[self.features_dictionary_keys[8]].append(tempo)
            
            if len(self.features_dictionary_keys) > 9:
                autocorrelation = features_extractor.get_autocorrelation(audio_array)
                self.features_dictionary[self.features_dictionary_keys[9]].append(autocorrelation)
            
            if len(self.features_dictionary_keys) > 10:
                stft = features_extractor.get_stft(audio_array)
                self.features_dictionary[self.features_dictionary_keys[10]].append(stft)
            
            if len(self.features_dictionary_keys) > 11:
                rolloff = features_extractor.get_rolloff(audio_array)
                self.features_dictionary[self.features_dictionary_keys[11]].append(rolloff)
            
            if len(self.features_dictionary_keys) > 12:
                spectral_centroid = features_extractor.get_spectral_centroid(audio_array)
                self.features_dictionary[self.features_dictionary_keys[12]].append(spectral_centroid)
                
        return self.features_dictionary
    
    def concatenate_mfccs_tags(tags_array, number_mfccs=13):
        """
        Method that concatenate mffcs tags in a tags array
        """
        tags_mfccs_columns = ["" for x in range(number_mfccs)]
        mfccs_literal = 'MFCCS'
        for i in range(0, number_mfccs):
            tags_mfccs_columns[i] = 'MFCCS'+str(i)

        tags_array = [*tags_array, *tags_mfccs_columns]
        return tags_array
    
    def concatenate_features_arrays_dictionaries(self, dict_one, dict_two):
    
        dict_two_size = len(dict_two['Energy'])

        for i in range(0, dict_two_size):
            if 'Energy' in self.features_dictionary:
                dict_one['Energy'].append(dict_two['Energy'][i])
            
            if 'RMSE' in self.features_dictionary:
                dict_one['RMSE'].append(dict_two['RMSE'][i])
            
            if 'ZCR' in self.features_dictionary:
                dict_one['ZCR'].append(dict_two['ZCR'][i])
            
            if 'MFCCS' in self.features_dictionary:
                dict_one['MFCCS'].append(dict_two['MFCCS'][i])
            
            if 'CQT' in self.features_dictionary:
                dict_one['CQT'].append(dict_two['CQT'][i])
            
            if 'Spectral_Contrast' in self.features_dictionary:
                dict_one['Spectral_Contrast'].append(dict_two['Spectral_Contrast'][i])
            
            if 'Chromagram' in self.features_dictionary:
                dict_one['Chromagram'].append(dict_two['Chromagram'][i])
            
            if 'Tempo' in self.features_dictionary:
                dict_one['Tempo'].append(dict_two['Tempo'][i])
            
            if 'Autocorrelation' in self.features_dictionary:
                dict_one['Autocorrelation'].append(dict_two['Autocorrelation'][i])
                
            if 'STFT' in self.features_dictionary:
                dict_one['STFT'].append(dict_two['STFT'][i])
            
            if 'Rolloff' in self.features_dictionary:
                dict_one['Rolloff'].append(dict_two['Rolloff'][i])
            
            if 'Spectral_Centroid' in self.features_dictionary:
                dict_one['Spectral_Centroid'].append(dict_two['Spectral_Centroid'][i])
            
        return dict_one
    
    


In [50]:
def multiprocess_feature_dict_creation():
    
    mean_features_extractor = FeaturesExtractor(feature_to_extract="mean")
    var_features_extractor = FeaturesExtractor(feature_to_extract="var")
    
    
    
    mean_features_dictionary = {
                    'Genre': [],
                    'Energy_Mean': [],
                    'RMSE_Mean': [],
                    'ZCR_Mean': [],
                    'MFCCS': 13,
                    'CQT_Mean': [],
                    'Spectral_Contrast_Mean': [],
                    'Chromagram_Mean':[],
                    'Tempo_Mean':[],
                    'Autocorrelation_Mean':[],
                    'STFT_Mean':[],
                    'Rolloff_Mean':[],
                    'Spectral_Centroid_Mean':[],
    }
    
    var_features_dictionary = {
                    'Genre': [],
                    'Energy_Var': [],
                    'RMSE_Var': [],
                    'ZCR_Var': [],
                    'MFCCS': 13,
                    'CQT_Var': [],
                    'Spectral_Contrast_Var': [],
                    'Chromagram_Var':[],
                    'Tempo_Var':[],
                    'Autocorrelation_Var':[],
                    'STFT_Var':[],
                    'Rolloff_Var':[],
                    'Spectral_Centroid_Var':[],
    }
    
    mean_feature_dict_creator = MusicFeaturesDictCreator(mean_features_dictionary)
    var_feature_dict_creator = MusicFeaturesDictCreator(var_features_dictionary)
    
#     LINES USED FOR TESTING RUNNING EXCEPTIONS USING LESS DATA
    
#     mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_blues[:2], 'Blues', mean_features_extractor) # Initializing
#     mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_classical[:2], 'Classical', mean_features_extractor)
    
#     var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_blues[:2], 'Blues', var_features_extractor) # Initializing
#     var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_classical[:2], 'Classical', var_features_extractor)
    
#     features_array_dict = {**mean_features_array_dict, **var_features_array_dict}
    
    
#     GTZAN
    
    
#     mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_blues, 'Blues', mean_features_extractor) # Initializing
#     mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_classical, 'Classical', mean_features_extractor)
#     mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_country, 'Country', mean_features_extractor)
#     mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_disco, 'Disco', mean_features_extractor)
#     mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_hiphop, 'Hiphop', mean_features_extractor)
#     mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_jazz, 'Jazz', mean_features_extractor)
#     mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_metal, 'Metal', mean_features_extractor)
#     mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_pop, 'Pop', mean_features_extractor)
#     mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_reggae, 'Reggae', mean_features_extractor)
#     mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_rock, 'Rock', mean_features_extractor)
    
#     var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_blues, 'Blues', var_features_extractor) # Initializing
#     var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_classical, 'Classical', var_features_extractor)
#     var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_country, 'Country', var_features_extractor)
#     var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_disco, 'Disco', var_features_extractor)
#     var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_hiphop, 'Hiphop', var_features_extractor)
#     var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_jazz, 'Jazz', var_features_extractor)
#     var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_metal, 'Metal', var_features_extractor)
#     var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_pop, 'Pop', var_features_extractor)
#     var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_reggae, 'Reggae', var_features_extractor)
#     var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_rock, 'Rock', var_features_extractor)


# ISMIR04

    mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_classical, 'Classical', mean_features_extractor)
    mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_electronic, 'Electronic', mean_features_extractor)
    mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_jazz, 'Jazz', mean_features_extractor)
    mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_metal, 'Metal', mean_features_extractor)
    mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_pop, 'Pop', mean_features_extractor)
    mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_punk, 'Punk', mean_features_extractor)
    mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_rock, 'Rock', mean_features_extractor)
    mean_features_array_dict = mean_feature_dict_creator.ampliate_features_dictionary(audios_world, 'World', mean_features_extractor)

    var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_classical, 'Classical', var_features_extractor)
    var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_electronic, 'Electronic', var_features_extractor)
    var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_jazz, 'Jazz', var_features_extractor)
    var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_metal, 'Metal', var_features_extractor)
    var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_pop, 'Pop', var_features_extractor)
    var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_punk, 'Punk', var_features_extractor)
    var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_rock, 'Rock', var_features_extractor)
    var_features_array_dict = var_feature_dict_creator.ampliate_features_dictionary(audios_world, 'World', var_features_extractor)


    features_array_dict = {**mean_features_array_dict, **var_features_array_dict}
#     features_array_dict = mean_features_array_dict
#     features_array_dict = var_features_array_dict

    return features_array_dict

def df_extraction():
    """
    Method that convert a features dictionary in a pandas dataFrame
    """
    
    features_array_dict = multiprocess_feature_dict_creation()
    
    
#     tags_dataframe_columns = ['Genre', 'Energy_Mean', 'RMSE_Mean', 'ZCR_Mean', 'CQT_Mean', 'Spectral_Contrast_Mean', 'Chromagram_Mean', 'Tempo_Mean', 'STFT_Mean', 
#                               'Autocorrelation_Mean', 'STFT_Mean', 'Spectral_Centroid_Mean', 'Rolloff_Mean']
    
    tags_dataframe_columns = ['Genre', 'Energy_Mean', 'RMSE_Mean', 'ZCR_Mean', 'CQT_Mean', 'Spectral_Contrast_Mean', 'Chromagram_Mean', 'Tempo_Mean', 'STFT_Mean', 
                              'Autocorrelation_Mean', 'STFT_Mean', 'Spectral_Centroid_Mean', 'Rolloff_Mean',
                              'Energy_Var', 'RMSE_Var', 'ZCR_Var', 'CQT_Var', 'Spectral_Contrast_Var', 'Chromagram_Var', 'Tempo_Var', 'STFT_Var',
                              'Autocorrelation_Var', 'STFT_Var', 'Spectral_Centroid_Var', 'Rolloff_Var']
    
    tags_dataframe_columns = MusicFeaturesDictCreator.concatenate_mfccs_tags(tags_dataframe_columns)
    print(tags_dataframe_columns)
#     tags_dataframe_columns = ['Genre', 'Energy', 'RMSE', 'ZCR', 'CQT', 'Spectral_Contrast', 'Chromagram', 'Tempo', 'MFCCS']
    
    print(features_array_dict)
    
    df = pd.DataFrame(features_array_dict, columns = tags_dataframe_columns)
    
    return df

# Export pandas dataFrame in a csv

df = df_extraction()

df.to_csv('ismir04_100.csv')


print(df.head(5))
print('impresito')

NameError: name 'audios_classical' is not defined

In [5]:
 feature_dict_creator = MusicFeaturesDictCreator()
    
print(feature_dict_creator.features_dictionary)

{'Genre': [], 'Energy': [], 'RMSE': [], 'ZCR': [], 'MFCCS': None, 'CQT': [], 'Spectral_Contrast': [], 'Chromagram': [], 'Tempo': [], 'Autocorrelation': [], 'STFT': [], 'Rolloff': [], 'Spectral_Centroid': []}


In [2]:
from Datasets.fma.fma import utils
tracks = utils.load('Datasets/fma/fma/data/fma_metadata/tracks.csv')
features = utils.load('Datasets/fma/fma/data/fma_metadata/features.csv')

In [94]:
# FMA dataframe extractor

fma_features_dictionary = {
    'Genre': [],
    'Energy_Mean': [],
    'RMSE_Mean': [],
    'ZCR_Mean': [],
    'MFCCS': 13,
    'CQT_Mean': [],
    'Spectral_Contrast_Mean': [],
    'Chromagram_Mean':[],
    'Tempo_Mean':[],
    'Autocorrelation_Mean':[],
    'STFT_Mean':[],
    'Rolloff_Mean':[],
    'Spectral_Centroid_Mean':[],
}

fma_features_dictionary = {
    'Genre': [],
    'ZCR_Mean': [],
    'RMSE_Mean': [],
}

# FMA feature extraction

small = tracks['set', 'subset'] <= 'small'

# instrumental = tracks['track', 'genre_top'] == 'Instrumental'
# rock = tracks['track', 'genre_top'] == 'Rock'
# classical = tracks['track', 'genre_top'] == 'classical'
# country = tracks['track', 'genre_top'] == 'country'

def fma_mfccs_means_to_my_dict_converter(features_dict, mfccs_means):
    number_mfccs = 13
    for i in range(1, number_mfccs):
        features_dict['MFCCS'+str(i)] = []

    for j in range(1, number_mfccs):
        if j < 10:
            features_dict['MFCCS'+str(j)] = mfccs_means['0'+str(j)].to_list()
        else:
            features_dict['MFCCS'+str(j)] = mfccs_means[str(j)].to_list()
    return features_dict

mfccs_means  = features.loc[small, 'mfcc']['mean']
fma_mfccs_means_to_my_dict_converter(fma_features_dictionary, mfccs_means)

zcr_means  = features.loc[small, 'zcr']['mean']['01'].to_list()
fma_features_dictionary['ZCR_Mean'] = zcr_means

rmse_means  = features.loc[small, 'rmse']['mean']['01'].to_list()
fma_features_dictionary['RMSE_Mean'] = rmse_means

genres = tracks.loc[small, ('track', 'genre_top')].to_list()
fma_features_dictionary['Genre'] = genres

tags_dataframe_columns = ['Genre', 'ZCR_Mean', 'RMSE_Mean']

def simple_df_creator(fma_features_dictionary, tags_dataframe_columns):
    tags_dataframe_columns = MusicFeaturesDictCreator.concatenate_mfccs_tags(tags_dataframe_columns)
    
    print(tags_dataframe_columns)
    
    df = pd.DataFrame(fma_features_dictionary, columns = tags_dataframe_columns)
    
    return df
    
df = simple_df_creator(fma_features_dictionary, tags_dataframe_columns)

df.to_csv('rmse.csv')

df.head(5)

# genres = utils.load('Datasets/fma/fma/data/fma_metadata/genres.csv')
# print('{} top-level genres'.format(len(genres['top_level'].unique())))
# genres.loc[genres['top_level'].unique()].sort_values('#tracks', ascending=False)

# train = tracks['set', 'split'] == 'training'
# val = tracks['set', 'split'] == 'validation'
# test = tracks['set', 'split'] == 'test'

# X_train = features.loc[small & train, 'zcr']
# Y_train = tracks.loc[small & train, ('track', 'genre_top')]



['Genre', 'ZCR_Mean', 'RMSE_Mean', 'MFCCS0', 'MFCCS1', 'MFCCS2', 'MFCCS3', 'MFCCS4', 'MFCCS5', 'MFCCS6', 'MFCCS7', 'MFCCS8', 'MFCCS9', 'MFCCS10', 'MFCCS11', 'MFCCS12']


Unnamed: 0,Genre,ZCR_Mean,RMSE_Mean,MFCCS0,MFCCS1,MFCCS2,MFCCS3,MFCCS4,MFCCS5,MFCCS6,MFCCS7,MFCCS8,MFCCS9,MFCCS10,MFCCS11,MFCCS12
0,Hip-Hop,0.085629,3.188761,,-163.772964,116.696678,-41.753826,29.144329,-15.050158,18.879372,-8.918165,12.002118,-4.253151,1.359791,-2.683,-0.794632
1,Hip-Hop,0.053114,3.251386,,-205.440491,132.215073,-16.085823,41.514759,-7.642954,16.942802,-5.651261,9.569445,0.503157,8.673513,-8.271377,0.594473
2,Pop,0.077515,3.89381,,-135.864822,157.040085,-53.453247,17.198896,6.868035,13.934344,-11.749298,8.360711,-5.130381,0.233845,-5.421206,1.679479
3,Folk,0.052379,2.953848,,-225.713318,139.332825,-13.097699,44.533356,2.4684,28.328743,-9.931481,10.810857,3.002879,-0.937692,7.138268,-6.62526
4,Folk,0.040267,2.576761,,-253.143906,155.716324,-16.636627,23.683815,6.045957,11.692952,-9.947761,6.887814,-3.273322,-6.340906,7.602782,-5.851329
