In [1]:
## extracts features from audio files and converts into numpy
import librosa
import numpy as np
import os, re, csv, sys
from datetime import datetime

global hop_length, mfcc_len

# Set the hop length; at 22050 Hz, 512 samples ~= 23ms
hop_length = 128


In [5]:
## return a (flatten) one-D array of mfcc of an audio file
def getFlattenMFCC(audio_file):
    try:
        y, sr = librosa.load(audio_file)
    except FileNotFoundError:
        print('No such file or directory')

    # Compute MFCC features from the raw signal
    print(audio_file)
    return librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13).flatten()

def getFlattenChroma(audio_file):
    #print ('audio_file in getFlattenChroma: ', audio_file)
    
    try: 
        y, sr = librosa.load(audio_file)
    except FileNotFoundError:
        print('No such file or directory')
    #print('print audio_file inside getFlattenChroma: ', audio_file)
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    x = librosa.feature.chroma_cqt(y=y_harmonic,sr=sr)
    print('shape: ', x.shape)
    #return librosa.feature.chroma_cqt(y=y_harmonic,sr=sr).flatten()
    return x.flatten()

def getChroma(audio_file):
    #print ('audio_file in getFlattenChroma: ', audio_file)
    
    try: 
        y, sr = librosa.load(audio_file)
    except FileNotFoundError:
        print('No such file or directory')
    #print('print audio_file inside getFlattenChroma: ', audio_file)
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    return librosa.feature.chroma_cqt(y=y_harmonic,sr=sr)

# ## return a (flatten) one-D array of chromagram of an audio file
# def getChroma(audio_file):
#     print ('audio file getChroma: ')
#     print (audio_file)
#     y, sr = librosa.load(audio_file)
#     # Separate harmonics and percussives into two waveforms
#     y_harmonic, y_percussive = librosa.effects.hpss(y)

#     # Compute chroma features from the harmonic signal
#     return librosa.feature.chroma_cqt(y=y_harmonic,sr=sr).flatten()


## return a list of 1-d array of chromagram padded with 0's of ALL audio files
def getPaddedChroma(chroma):
    #print ('printing audio_files[0] in getPaddedChroma: ')
    #print (audio_files[0])
#     result = [getChroma(f) for f in audio_files]
    #result = [getFlattenChroma(f) for f in audio_files]

    ##pad arrays with 0's. Get arrays of size Max
    max_col = max([x.shape[1] for x in chroma])
    padded = [np.pad(x, [(0,0), (0, max_col - x.shape[1])], mode = 'constant') for x in chroma]

    ##sanity check
    is_shorter = sum([x.shape[1] - max_col for x in padded])
    if is_shorter < 0:
        print('not padded well')
        return -1
    else:
        return padded

In [None]:
## return a list of 1-d array of MFCC padded with 0's of ALL audio files
def getPaddedMFCC(audio_files, train = True):
    
    result = [getFlattenMFCC(f) for f in audio_files]

    if train:
        ##pad arrays with 0's. Get arrays of size Max
        max_len = max([len(x) for x in result])
    else:
        max_len = mfcc_len
        
    padded = [np.pad(x, (0, max_len - len(x)), mode = 'constant') for x in result]

    ##sanity check
    is_shorter = sum([len(x) - max_len for x in padded])
    if is_shorter < 0:
        print('not padded well')
        return -1
    else:
        return padded, max_len

In [6]:
def process_train_audio(file_list):
    
    print('Processing ', len(file_list), ' files')
    #print('process_train', file_list)
    #print ('file_list[0]', file_list[0])
    
    ####MFCC
#     mfcc = [getFlattenMFCC(f) for f in file_list]
#     padded_mdcc, mfcc_len = getPaddedMFCC(mfcc)
    print('----', len(file_list))
    chroma = [getChroma(f) for f in file_list]
    padded_chroma = getPaddedChroma(chroma)
    
    
    
    ### user cannot fix this
#     assert (mfcc != -1 and chrom != -1), "Audio process does not produce uniform format."

    
    
    ## concatenate mfcc and chrom features
#     attr_input = [np.hstack([m, c]) for m, c in zip(padded_mdcc, padded_chrom)]
    attr_input = padded_chroma
    ## checking the final length
    #print(len(mfcc[5]), len(chrom[5]), len(x[5]))
    
    
    ##detect targets from sound names
    p = re.compile('^[aeou]|[bcdfghjklmnpqrstwxyz]+(?=[aeiou])')
    splt_file = [f.split('/')[-1] for f in file_list]
    
    target_input = [p.match(f).group() for f in splt_file]
    
    print('Processing finished')
    return attr_input, target_input

##to process audio file for both validation and testing
def process_validate_audio(file_list):
    
    print('Processing ', len(file_list), ' files')
    
    ####MFCC
    mfcc = [getFlattenMFCC(f) for f in file_list]
    padded_mdcc = getPaddedMFCC(mfcc)
    chroma = [getFlattenChroma(f) for f in file_list]
    padded_chroma = getPaddedChroma(chroma)
    
    ### user cannot fix this
    assert (mfcc != -1 and chroma != -1), "Audio process does not produce uniform format."

    
    
    ## concatenate mfcc and chrom features
    attr_input = [np.hstack([m, c]) for m, c in zip(padded_mdcc, padded_chroma)]
    ## checking the final length
    #print(len(mfcc[5]), len(chrom[5]), len(x[5]))


    ##detect targets from sound names
    p = re.compile('^[aeou]|[bcdfghjklmnpqrstwxyz]+(?=[aeiou])')
    target_input = [p.match(f).group() for f in audio_files]
    
    print('Processing finished')
    return attr_input, target_input

def writeCSV(file, file_name):
    
    with open(file_name,"w+") as processed:
            csvWriter = csv.writer(processed,delimiter=',')
            csvWriter.writerows(file)
    print('File saved at ', file_name)        
    
    
    
    

In [7]:
import os, librosa
audio_dir = '/Users/panchanok/Desktop/PyHack2019/PyHack2019/sound_samples/validate/'
all_files = [audio_dir + d for d in os.listdir(audio_dir)[1:5]]

print (len(all_files))
#print (all_files)
att, tar = process_train_audio(all_files)
print (att)

#print (att.shape)
#writeCSV(att, 'temp_chroma_attr.csv')
#writeCSV(tar, 'temp_chroma_tar.csv')

4
Processing  4  files
---- 4
Processing finished
[array([[0.63950405, 0.64654713, 0.73809123, 0.73968945, 0.41436954,
        0.74483189, 1.        , 1.        , 0.65208086, 0.56996307,
        0.8773378 , 0.0891777 , 0.04304492, 0.04169666, 0.05008601,
        0.0542839 , 0.03117015, 0.09179829, 0.102028  , 0.09096481,
        0.11466745, 0.16992233, 0.22170883, 0.20129076, 0.2088395 ,
        0.12842991, 0.12444241, 0.09031662, 0.08252487, 0.11360046,
        0.03671703, 0.07926927, 0.09077891, 0.14432984, 0.17870048,
        0.13138313, 0.10579195, 0.07914405, 0.0629112 , 0.03394065,
        0.02075202, 0.01339059, 0.00447883, 0.00792944, 0.01202969,
        0.01051727, 0.00676819, 0.00627841, 0.00511143, 0.00665315,
        0.00759238, 0.03264259, 0.08384166],
       [0.50175832, 0.59169944, 0.44358932, 0.55506815, 0.20681784,
        0.81345469, 0.98167464, 0.86354244, 0.57087162, 0.72343876,
        0.61224071, 0.04693296, 0.03408843, 0.01889501, 0.05831956,
        0.15003299, 

If there is error from the above chunk, make sure only mp3 is included in aggr/. Check even invisible .dstore

In [None]:
validate_path = '/Users/panchanok/Desktop/PyHack2019/PyHack2019/sound_samples/validate/'
#process_audio(validate_path, train = True)

In [None]:
test_path = '/Users/athicha/Desktop/PyHack2019/sound_samples/uservoice/'
process_audio(test_path, train = False)

Try displaying audio