In [1]:
## extracts features from audio files and converts into numpy
import librosa, pickle, random
import numpy as np
import os, re, csv, sys
from datetime import datetime
from random import shuffle
import torch

global hop_length, mfcc_len
# global att, tar
# Set the hop length; at 22050 Hz, 512 samples ~= 23ms
hop_length = 128


In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import librosa.display

In [3]:

## from Kagle tutorial. See https://www.kaggle.com/CVxTz/audio-data-augmentation
def addWhiteNoise(audio):
    noise = np.random.randn(len(audio))
    return audio + 0.05*noise
    

In [4]:
# ## return a (flatten) one-D array of mfcc of an audio file
# def getFlattenMFCC(audio_file):
#     try:
#         y, sr = librosa.load(audio_file)
#     except FileNotFoundError:
#         print('No such file or directory')

#     # Compute MFCC features from the raw signal
#     print(audio_file)
#     return librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13).flatten()

def getMFCC(audio_file):
    #print ('audio_file in getFlattenChroma: ', audio_file)
    
    try: 
        y, sr = librosa.load(audio_file)
    except FileNotFoundError:
        print('No such file or directory')
    return librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)


def getChroma(audio_file):
    #print ('audio_file in getFlattenChroma: ', audio_file)
    
    try: 
        y, sr = librosa.load(audio_file)
    except FileNotFoundError:
        print('No such file or directory')
    #print('print audio_file inside getFlattenChroma: ', audio_file)
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    return librosa.feature.chroma_cqt(y=y_harmonic,sr=sr)


## return a list of 1-d array of chromagram padded with 0's of ALL audio files
def getPaddedChroma(chroma):

    ##pad arrays with 0's. Get arrays of size Max
    max_col = max([x.shape[1] for x in chroma])
    padded = [np.pad(x, [(0,0), (0, max_col - x.shape[1])], mode = 'constant') for x in chroma]

    ##sanity check
    is_shorter = sum([x.shape[1] - max_col for x in padded])
    if is_shorter < 0:
        print('not padded well')
        return -1
    else:
        return padded
    
## return a list of 1-d array of MFCC padded with 0's of ALL audio files
def getPaddedMFCC(mfcc):

    print(mfcc)
    ##pad arrays with 0's. Get arrays of size Max
    max_col = max([x.shape[1] for x in mfcc])
    padded = [np.pad(x, [(0,0), (0, max_col - x.shape[1])], mode = 'constant') for x in mfcc]

    ##sanity check
    is_shorter = sum([x.shape[1] - max_col for x in padded])
    if is_shorter < 0:
        print('not padded well')
        return -1
    else:
        return padded

In [5]:
def process_train_audio(raw_file_list):
    
    file_list = [x for x in raw_file_list if '.mp3' in x]
    print('Processing ', len(file_list), ' files')

    ##sample data to add whitenoise
    ##########################
    wnratio = 0.7
    r = range(int(wnratio*len(file_list)))

    sample_file_list = [random.choice(file_list) for i in r]

    padded_mfcc_result = []
    for s in sample_file_list:
        y, sr = librosa.load(s)
        wn_y = addWhiteNoise(y)
        z = librosa.feature.mfcc(y=wn_y, sr=sr, hop_length=hop_length, n_mfcc=13)
        padded_mfcc_result.append(z)
    #getPaddedMFCC(padded_mfcc_result)
    
    ####MFCC
#     mfcc = [getFlattenMFCC(f) for f in file_list]
#     padded_mdcc, mfcc_len = getPaddedMFCC(mfcc)
    
    mfcc = [getMFCC(f) for f in file_list]
    padded_mfcc = getPaddedMFCC(mfcc + padded_mfcc_result)
    #print ("padded_chroma: ", padded_chroma[0])
    
    
    ### user cannot fix this
#     assert (mfcc != -1 and chrom != -1), "Audio process does not produce uniform format."

    
    
    ## concatenate mfcc and chrom features
#     attr_input = [np.hstack([m, c]) for m, c in zip(padded_mdcc, padded_chrom)]
    attr_input = padded_mfcc
    ## checking the final length
    #print(len(mfcc[5]), len(chrom[5]), len(x[5]))
    
    
    ##detect targets from sound names
    p = re.compile('^[aeou]|[bcdfghjklmnpqrstwxyz]+(?=[aeiou])|nv|lv')
    splt_file = [f.split('/')[-1] for f in file_list+sample_file_list]
    
    target_input = [p.match(f).group() for f in splt_file]
    
    print('Processing finished')
    return attr_input, target_input

##to process audio file for both validation and testing
def process_validate_audio(file_list):
    
    print('Processing ', len(file_list), ' files')
    
    ####MFCC
    mfcc = [getFlattenMFCC(f) for f in file_list]
    padded_mdcc = getPaddedMFCC(mfcc)
    chroma = [getFlattenChroma(f) for f in file_list]
    padded_chroma = getPaddedChroma(chroma)
    
    ### user cannot fix this
    assert (mfcc != -1 and chroma != -1), "Audio process does not produce uniform format."

    
    
    ## concatenate mfcc and chrom features
    attr_input = [np.hstack([m, c]) for m, c in zip(padded_mdcc, padded_chroma)]
    ## checking the final length
    #print(len(mfcc[5]), len(chrom[5]), len(x[5]))


    ##detect targets from sound names
    p = re.compile('^[aeou]|[bcdfghjklmnpqrstwxyz]+(?=[aeiou])|nv|lv')
    target_input = [p.match(f).group() for f in audio_files]
    
    print('Processing finished')
    return attr_input, target_input     
    

In [None]:
# audio_dir = '/Users/panchanok/Desktop/PyHack2019/PyHack2019/sound_samples/mix_samples/'
# all_files = [audio_dir + d for d in os.listdir(audio_dir)]

In [None]:
# att, tar = process_train_audio(all_files)
# att_file = open(r'TEST_S_X_ATT.pkl', 'wb')
# pickle.dump(att, att_file)
# att_file.close()
# tar_file = open(r'TEST_S_X_TAR.pkl', 'wb')
# pickle.dump(tar, tar_file)
# tar_file.close()

In [None]:
# #raw_file_list

# file_list = [x for x in raw_file_list if '.mp3' in x]
# print('Processing ', len(file_list), ' files')

# ##sample data to add whitenoise
# wnratio = 0.7
# r = range(int(wnratio*len(file_list)))

# sample_file_list = [random.choice(file_list) for i in r]

# result = []
# for s in sample_file_list:
#     y, sr = librosa.load(s)
#     wn_y = addWhiteNoise(y)
#     z = librosa.feature.mfcc(y=wn_y, sr=sr, hop_length=hop_length, n_mfcc=13)
#     result.append(z)
# getPaddedMFCC(result)
# #result.append()

In [None]:
######################################
######################################
##Compare before and after adding WN #
######################################
######################################
######################################
import IPython.display as ipd

audio_dir = '/Users/panchanok/Desktop/PyHack2019/PyHack2019/sound_samples/xs/'
all_files = [audio_dir + d for d in os.listdir(audio_dir)][1:10]
shuffle(all_files)

print(all_files)
file_list = [x for x in all_files if '.mp3' in x]
print('Processing ', len(file_list), ' files')

##sample data to add whitenoise
wnratio = 0.7
r = range(int(wnratio*len(file_list)))
print(r)
sample_file_list = [random.choice(file_list) for i in r]
sample_audio = [librosa.load(f) for f in sample_file_list]

y, sr = librosa.load(sample_file_list[0])
plt.figure(figsize=(14, 5))
librosa.display.waveplot(y, sr=sr)
ipd.Audio(y, rate = 22050)



# wn_audio = [addWhiteNoise(a) for a in sample_audio]
# mfcc_wn_audio = [librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13) for y in wn_audio]

# p_mfcc_wn_audio = [getPaddedMFCC(f) for f in mfcc_wn_audio]


In [None]:
new = addWhiteNoise(y)
plt.figure(figsize=(14, 5))
librosa.display.waveplot(new, sr=sr)
ipd.Audio(new, rate = 22050)

In [6]:
#audio_dir = '/Users/athicha/Desktop/PyHack2019/sound_samples/train/'
audio_dir = '/Users/panchanok/Desktop/PyHack2019/sound/tone_perfect/'
all_files = [audio_dir + d for d in os.listdir(audio_dir)]
#print(len(all_files))
shuffle(all_files)

att, tar = process_train_audio(all_files)
att_file = open(r'mfcc_noise_attr.pkl', 'wb')
pickle.dump(att, att_file)
att_file.close()


tar_file = open(r'mfcc_noise_tar.pkl', 'wb')
pickle.dump(tar, tar_file)
tar_file.close()


Processing  9840  files


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Processing finished


In [None]:
# # Model class must be defined somewhere
# model = torch.load('mfcc_m_1000_dr0.5_wc0.001_lr1e-05.pt')
# model.eval()

In [7]:
len(att)

16728

In [8]:
len(tar)

16728

In [None]:
tar