In [None]:
'''
This notebook is oriented to the creation of an pandas dataframe builded from GTZAN Music Genre Dataset:
This features are: cqt, rmse, energy, mfccs, chromagram and spectral contrast
This notebook is currently in development phase, so contains a considerably quantity of test code
'''

In [2]:
import tensorflow as tf
import pandas as pd
from pathlib import Path
import librosa, librosa.display
import matplotlib.pyplot as plt
import numpy as np
import gc

# Reading Audio Data

print('iniciando...')

audios_blues = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/blues/blues.000*.au')
]

audios_classical = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/classical/classical.000*.au')
]

audios_country = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/country/country.000*.au')
]

audios_disco = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/disco/disco.000*.au')
]

audios_hiphop = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/hiphop/hiphop.000*.au')
]

audios_jazz = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/jazz/jazz.000*.au')
]

audios_metal = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/metal/metal.000*.au')
]

audios_pop = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/pop/pop.000*.au')
]

audios_reggae = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/reggae/reggae.000*.au')
]

audios_rock = [
    librosa.load(p)[0] for p in Path().glob('Datasets/GTZAN_GENRE_COLLECTION/genres/rock/rock.000*.au')
]


print('Ha salio bien')

iniciando...
Ha salio bien


In [71]:
# Funtions to extracting features and declaration of audio extraction parameters

import sklearn

hop_length = 256
frame_length = 512
sr = 22050
audio_array = audios_blues[0]
# frames = range(len(rmse))
fmin = 36
n_bins = 72
# t = librosa.frames_to_time(frames, sr=sr, hop_length=hop_length)

def get_short_time_energy(audio_array, sr, hop_length, frame_length):
    librosa.get_duration(audio_array, sr)
    energy = np.array([
        sum(abs(audio_array[i:i+frame_length]**2))
        for i in range(0, len(audio_array), hop_length)
    ])
#     frames = range(len(energy))
#     t = librosa.frames_to_time(frames, sr=sr, hop_length=hop_length)
    
    return energy.mean(axis=0)

def get_rmse(audio_array, sr, hop_length, frame_length):
    rmse = librosa.feature.rmse(audio_array, frame_length=frame_length, hop_length=hop_length, center=True)
    rmse = rmse[0]
    return rmse.mean(axis=0)


def get_zcr(audio_array):
    zero_crossings = librosa.zero_crossings(audio_array)
    zero_crossings = sum(zero_crossings)
    return zero_crossings.mean()

def get_mfccs(audio_array, sr):
    mfccs = librosa.feature.mfcc(audio_array, sr)
    mfccs_mean = np.arange(20)
    for i in  range(0, len(mfccs)-1):
        mfccs_mean[i] = mfccs[i].mean(axis=0)
#     mfccs = sklearn.preprocessing.scale(mfccs, axis=1)
#     print()
#     print (mfccs.mean(axis=1))
#     print (mfccs.var(axis=1))
#     librosa.display.specshow(mfccs, sr=sr, x_axis='time')
    
    return mfccs_mean

def get_cqt(audio_array, sr, fmin, n_bins, hop_length):
    C = librosa.cqt(audio_array, sr=sr, fmin=fmin, n_bins=n_bins, hop_length=hop_length)
    return C.mean(axis=0).mean(axis=0)

def get_chromagram(audio_array, sr, hop_length):
    chromagram = librosa.feature.chroma_cens(audio_array, sr=sr, hop_length=hop_length)
    return chromagram.mean(axis=0).mean(axis=0)

def get_spectral_contrast(audio_array, sr):
    spectral_contrast = librosa.feature.spectral_contrast(audio_array, sr=sr)
    return spectral_contrast.mean(axis=0).mean(axis=0)

In [87]:
# Creating a pandas data frame with features
def create_features_dictionary(audios_list, label_name):
    short_time_energy = []
    rmse = []
    zcr = []
    mfccs = []
    cqt = []
    spectral_contrast = []
    chromagram = []
    features_array_dict = {
                    'Genre': [],
                    'Energy': [],
                    'RMSE': [],
                    'ZCR': [],
                    'MFCCS0': [],
                    'MFCCS1': [],
                    'MFCCS2': [],
                    'MFCCS3': [],
                    'MFCCS4': [],
                    'MFCCS5': [],
                    'MFCCS6': [],
                    'MFCCS7': [],
                    'MFCCS8': [],
                    'MFCCS9': [],
                    'MFCCS9': [],
                    'MFCCS10': [],
                    'MFCCS11': [],
                    'MFCCS12': [],
                    'MFCCS13': [],
                    'MFCCS14': [],
                    'MFCCS15': [],
                    'MFCCS16': [],
                    'MFCCS17': [],
                    'MFCCS18': [],
                    'MFCCS19': [],
                    'CQT': [],
                    'Spectral_Contrast': [],
                    'Chromagram':[]
                    }
    for audio_array in audios_list:
        short_time_energy = get_short_time_energy(audio_array, sr, hop_length, frame_length)
        rmse = get_rmse(audio_array, sr, hop_length, frame_length)
        zcr = get_zcr(audio_array)
        mfccs = get_mfccs(audio_array, sr)
        cqt = get_cqt(audio_array, sr, fmin, n_bins, hop_length)
        spectral_contrast = get_spectral_contrast(audio_array, sr)
        chromagram = get_chromagram(audio_array, sr, hop_length)
        
        features_array_dict['Genre'].append(label_name)
        features_array_dict['Energy'].append(short_time_energy)
        features_array_dict['RMSE'].append(rmse)
        features_array_dict['ZCR'].append(zcr)
        
        for i in range(0, len(mfccs)-1):
            features_array_dict['MFCCS'+str(i)].append(mfccs[i])
            
        features_array_dict['CQT'].append(cqt)
        features_array_dict['Spectral_Contrast'].append(spectral_contrast)
        features_array_dict['Chromagram'].append(chromagram)
        
    return features_array_dict

def concatenate_features_arrays_dictionaries(dict_one, dict_two):
    
    dict_size = len(dict_two['Energy'])
    
    for i in range(0, dict_size):
        dict_one['Genre'].append(dict_two['Genre'][i])
        dict_one['Energy'].append(dict_two['Energy'][i])
        dict_one['RMSE'].append(dict_two['RMSE'][i])
        dict_one['ZCR'].append(dict_two['ZCR'][i])
        
        for j in range(0, 19):
            dict_one['MFCCS'+str(j)].append(dict_two['MFCCS'+str(j)][i])
            
        dict_one['CQT'].append(dict_two['CQT'][i])
        dict_one['Spectral_Contrast'].append(dict_two['Spectral_Contrast'][i])
        dict_one['Chromagram'].append(dict_two['Chromagram'][i])
    return dict_one

def multiprocess_extraction_df():
    
    features_array_dict = create_features_dictionary(audios_blues, 'Blues') # Initializing
    classical_feature_dict = create_features_dictionary(audios_classical, 'Classical')
    country_feature_dict = create_features_dictionary(audios_country, 'Country')
    disco_feature_dict = create_features_dictionary(audios_disco, 'Disco')
    hiphop_feature_dict = create_features_dictionary(audios_hiphop, 'Hiphop')
    jazz_feature_dict = create_features_dictionary(audios_jazz, 'Jazz')
    metal_feature_dict = create_features_dictionary(audios_metal, 'Metal')
    pop_feature_dict = create_features_dictionary(audios_pop, 'Pop')
    reggae_feature_dict = create_features_dictionary(audios_reggae, 'Reggae')
    rock_feature_dict = create_features_dictionary(audios_rock, 'Rock')

    features_array_dict = concatenate_features_arrays_dictionaries(features_array_dict, classical_feature_dict)
    features_array_dict = concatenate_features_arrays_dictionaries(features_array_dict, country_feature_dict)
    features_array_dict = concatenate_features_arrays_dictionaries(features_array_dict, disco_feature_dict)
    features_array_dict = concatenate_features_arrays_dictionaries(features_array_dict, hiphop_feature_dict)
    features_array_dict = concatenate_features_arrays_dictionaries(features_array_dict, jazz_feature_dict)
    features_array_dict = concatenate_features_arrays_dictionaries(features_array_dict, metal_feature_dict)
    features_array_dict = concatenate_features_arrays_dictionaries(features_array_dict, pop_feature_dict)
    features_array_dict = concatenate_features_arrays_dictionaries(features_array_dict, reggae_feature_dict)
    features_array_dict = concatenate_features_arrays_dictionaries(features_array_dict, rock_feature_dict)
    
    tags_mfccs_columns = ["" for x in range(19)]
    mfccs_literal = 'MFCCS'
    for i in range(0, 19):
        tags_mfccs_columns[i] = 'MFCCS'+str(i)
    
    tags_dataframe_columns = ['Genre', 'Energy', 'RMSE', 'ZCR', 'CQT', 'Spectral_Contrast', 'Chromagram']
    tags_dataframe_columns = [*tags_dataframe_columns, *tags_mfccs_columns]
    
    
    df = pd.DataFrame(features_array_dict, columns = tags_dataframe_columns)
    
    return df


df = multiprocess_extraction_df()

df.to_csv('19_MFCCS_DATAFRAME.csv')


print(df.head(5))
print('impresito')

{'Genre': ['Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Classical', 'Classical', 'Classical', 'Classical', 'Classical', 'Classical', 'Classical'