In [6]:
## extracts features from audio files and converts into numpy
import librosa, pickle, random, librosa.display
from random import shuffle
import numpy as np
import os, re, csv, sys
#from random import shuffle
import torch

%matplotlib inline
import matplotlib.pyplot as plt

global hop_length #, mfcc_len
# global att, tar
# Set the hop length; at 22050 Hz, 512 samples ~= 23ms
hop_length = 128


In [2]:
## from Kagle tutorial. See https://www.kaggle.com/CVxTz/audio-data-augmentation
def addWhiteNoise(audio):
    noise = np.random.randn(len(audio))
    return audio + 0.05*noise   

In [3]:
## param: audio_file = the name of an audio file to be extract
## return: a 2D array of 13 MFCCs over time
def getMFCC(audio_file):
    
    try: 
        y, sr = librosa.load(audio_file)
    except FileNotFoundError:
        print('No such file or directory')
    return librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)



## param: audio_file = the name of an audio file to be extract
## return: a 2D array of 12 pitches as chromagram over time
def getChroma(audio_file):  
    try: 
        y, sr = librosa.load(audio_file)
    except FileNotFoundError:
        print('No such file or directory')
        
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    return librosa.feature.chroma_cqt(y=y_harmonic,sr=sr)
    
    
## return a list of 1-d array of MFCC padded with 0's at the end of ALL audio files
## param: chroma = a list of 2D arrays. The 2D array is an output from getMFCC()
## return: a list of 2D arrays whose columns are padded at the end with 0's
##       so that they all have the same number of columns. 
def getPaddedMFCC(mfcc):

    print(mfcc)
    ##pad arrays with 0's. Get arrays of size Max
    max_col = max([x.shape[1] for x in mfcc])
    padded = [np.pad(x, [(0,0), (0, max_col - x.shape[1])], mode = 'constant') for x in mfcc]

    ##sanity check
    is_shorter = sum([x.shape[1] - max_col for x in padded])
    if is_shorter < 0:
        print('not padded well')
        return -1
    else:
        return padded

In [4]:
## extract MFCC of audio files whose names are in list 'raw_file_list'
def process_train_audio(raw_file_list):
    
    ## remove files that are not .mp3
    file_list = [x for x in raw_file_list if '.mp3' in x]
    print('Processing ', len(file_list), ' files')

    ##sample data to add whitenoise
    ##########################
    wnratio = 0.7
    r = range(int(wnratio*len(file_list)))

    sample_file_list = [random.choice(file_list) for i in r]

    ##the result is actually not padded just yet
    padded_mfcc_result = []
    for s in sample_file_list:
        y, sr = librosa.load(s)
        wn_y = addWhiteNoise(y)
        z = librosa.feature.mfcc(y=wn_y, sr=sr, hop_length=hop_length, n_mfcc=13)
        padded_mfcc_result.append(z)
    

    ## Combine two lists of original and noisy lists
    ## And pad them
    mfcc = [getMFCC(f) for f in file_list]
    padded_mfcc = getPaddedMFCC(mfcc + padded_mfcc_result)
    
    
    ### user cannot fix this
#     assert (mfcc != -1 and chrom != -1), "Audio process does not produce uniform format."

    ## checking the final length
    #print(len(mfcc[5]), len(chrom[5]), len(x[5]))
    
    
    ##detect targets from sound names
    p = re.compile('^[aeou]|[bcdfghjklmnpqrstwxyz]+(?=[aeiou])|nv|lv')
    splt_file = [f.split('/')[-1] for f in file_list+sample_file_list]
    target_input = [p.match(f).group() for f in splt_file]
    
    print('Processing finished')
    return padded_mfcc, target_input    
    

In [None]:
######################################
##CHUNK1/2: Compare before and after #
######################################
######################################
######################################
import IPython.display as ipd

audio_dir = '/Users/panchanok/Desktop/PyHack2019/PyHack2019/sound_samples/xs/'
all_files = [audio_dir + d for d in os.listdir(audio_dir)][1:10]
shuffle(all_files)

print(all_files)
file_list = [x for x in all_files if '.mp3' in x]
print('Processing ', len(file_list), ' files')

##sample data to add whitenoise
wnratio = 0.7
r = range(int(wnratio*len(file_list)))
print(r)
sample_file_list = [random.choice(file_list) for i in r]
sample_audio = [librosa.load(f) for f in sample_file_list]

print('Original File: ')
y, sr = librosa.load(sample_file_list[0])
plt.figure(figsize=(14, 5))
librosa.display.waveplot(y, sr=sr)
ipd.Audio(y, rate = 22050)

In [None]:
######################################
##CHUNK2/2: Compare before and after #
######################################
######################################
######################################

print('Noisy File: ')
new = addWhiteNoise(y)
plt.figure(figsize=(14, 5))
librosa.display.waveplot(new, sr=sr)
ipd.Audio(new, rate = 22050)

In [None]:
####This chunk process the audio files

### get file name in the directory
audio_dir = '/Users/panchanok/Desktop/PyHack2019/sound/eightclasses/'
all_files = [audio_dir + d for d in os.listdir(audio_dir)]
shuffle(all_files)


## Process files to get attributes and targets
att, tar = process_train_audio(all_files)

## Save the processed files as .pkl files
att_file = open(r'eight_mfcc_noise_attr.pkl', 'wb')
pickle.dump(att, att_file)
att_file.close()

tar_file = open(r'eight_mfcc_noise_tar.pkl', 'wb')
pickle.dump(tar, tar_file)
tar_file.close()


Processing  3240  files
