In [1]:
## extracts features from audio files and converts into numpy
import librosa, pickle
import numpy as np
import os, re, csv, sys
from datetime import datetime
from random import shuffle

global hop_length, mfcc_len
# global att, tar
# Set the hop length; at 22050 Hz, 512 samples ~= 23ms
hop_length = 128


In [2]:
## return a (flatten) one-D array of mfcc of an audio file
def getFlattenMFCC(audio_file):
    try:
        y, sr = librosa.load(audio_file)
    except FileNotFoundError:
        print('No such file or directory')

    # Compute MFCC features from the raw signal
    print(audio_file)
    return librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13).flatten()

# def getFlattenChroma(audio_file):
#     #print ('audio_file in getFlattenChroma: ', audio_file)
    
#     try: 
#         y, sr = librosa.load(audio_file)
#     except FileNotFoundError:
#         print('No such file or directory')
#     #print('print audio_file inside getFlattenChroma: ', audio_file)
#     y_harmonic, y_percussive = librosa.effects.hpss(y)
#     x = librosa.feature.chroma_cqt(y=y_harmonic,sr=sr)
#     print('shape: ', x.shape)
#     #return librosa.feature.chroma_cqt(y=y_harmonic,sr=sr).flatten()
#     return x.flatten()

def getChroma(audio_file):
    #print ('audio_file in getFlattenChroma: ', audio_file)
    
    try: 
        y, sr = librosa.load(audio_file)
    except FileNotFoundError:
        print('No such file or directory')
    #print('print audio_file inside getFlattenChroma: ', audio_file)
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    return librosa.feature.chroma_cqt(y=y_harmonic,sr=sr)


## return a list of 1-d array of chromagram padded with 0's of ALL audio files
def getPaddedChroma(chroma):

    ##pad arrays with 0's. Get arrays of size Max
    max_col = max([x.shape[1] for x in chroma])
    padded = [np.pad(x, [(0,0), (0, max_col - x.shape[1])], mode = 'constant') for x in chroma]

    ##sanity check
    is_shorter = sum([x.shape[1] - max_col for x in padded])
    if is_shorter < 0:
        print('not padded well')
        return -1
    else:
        return padded

In [3]:
## return a list of 1-d array of MFCC padded with 0's of ALL audio files
def getPaddedMFCC(audio_files, train = True):
    
    result = [getFlattenMFCC(f) for f in audio_files]

    if train:
        ##pad arrays with 0's. Get arrays of size Max
        max_len = max([len(x) for x in result])
    else:
        max_len = mfcc_len
        
    padded = [np.pad(x, (0, max_len - len(x)), mode = 'constant') for x in result]

    ##sanity check
    is_shorter = sum([len(x) - max_len for x in padded])
    if is_shorter < 0:
        print('not padded well')
        return -1
    else:
        return padded, max_len

In [4]:
def process_train_audio(file_list):
    
    print('Processing ', len(file_list), ' files')

    ####MFCC
#     mfcc = [getFlattenMFCC(f) for f in file_list]
#     padded_mdcc, mfcc_len = getPaddedMFCC(mfcc)
    
    chroma = [getChroma(f) for f in file_list]
    padded_chroma = getPaddedChroma(chroma)
    #print ("padded_chroma: ", padded_chroma[0])
    
    
    
    ### user cannot fix this
#     assert (mfcc != -1 and chrom != -1), "Audio process does not produce uniform format."

    
    
    ## concatenate mfcc and chrom features
#     attr_input = [np.hstack([m, c]) for m, c in zip(padded_mdcc, padded_chrom)]
    attr_input = padded_chroma
    ## checking the final length
    #print(len(mfcc[5]), len(chrom[5]), len(x[5]))
    
    
    ##detect targets from sound names
    p = re.compile('^[aeou]|[bcdfghjklmnpqrstwxyz]+(?=[aeiou])')
    splt_file = [f.split('/')[-1] for f in file_list]
    
    target_input = [p.match(f).group() for f in splt_file]
    
    print('Processing finished')
    return attr_input, target_input

##to process audio file for both validation and testing
def process_validate_audio(file_list):
    
    print('Processing ', len(file_list), ' files')
    
    ####MFCC
    mfcc = [getFlattenMFCC(f) for f in file_list]
    padded_mdcc = getPaddedMFCC(mfcc)
    chroma = [getFlattenChroma(f) for f in file_list]
    padded_chroma = getPaddedChroma(chroma)
    
    ### user cannot fix this
    assert (mfcc != -1 and chroma != -1), "Audio process does not produce uniform format."

    
    
    ## concatenate mfcc and chrom features
    attr_input = [np.hstack([m, c]) for m, c in zip(padded_mdcc, padded_chroma)]
    ## checking the final length
    #print(len(mfcc[5]), len(chrom[5]), len(x[5]))


    ##detect targets from sound names
    p = re.compile('^[aeou]|[bcdfghjklmnpqrstwxyz]+(?=[aeiou])')
    target_input = [p.match(f).group() for f in audio_files]
    
    print('Processing finished')
    return attr_input, target_input     
    

In [6]:
#audio_dir = '/Users/athicha/Desktop/PyHack2019/sound_samples/train/'
audio_dir = '/Users/panchanok/Desktop/PyHack2019/PyHack2019/sound_samples/mix_samples/'
all_files = [audio_dir + d for d in os.listdir(audio_dir)]
shuffle(all_files)

att, tar = process_train_audio(all_files)
att_file = open(r'mix_chroma_attr.pkl', 'wb')
pickle.dump(att, att_file)
att_file.close()


tar_file = open(r'mix_chroma_tar.pkl', 'wb')
pickle.dump(tar, tar_file)
tar_file.close()


Processing  2352  files
Processing finished


If there is error from the above chunk, make sure only mp3 is included in aggr/. Check even invisible .ds_store