In [1]:
import glob
import os
import librosa
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
%matplotlib inline
plt.style.use('ggplot')

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 12
plt.rcParams['xtick.labelsize'] = 9
plt.rcParams['ytick.labelsize'] = 9
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 13

In [10]:
def windows(data, window_size):
    start = 0
    while start < len(data):
        yield start, start + window_size
        start += (window_size / 2)

def extract_features(parent_dir,sub_dirs,file_ext="*.wav",bands = 60, frames = 41):
    window_size = 512 * (frames - 1)
    log_specgrams = []
    labels = []
    for l, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            sound_clip,s = librosa.load(fn)
            label = fn.split('/')[10].split('-')[1]
            
            for (start,end) in windows(sound_clip,window_size):
                if(len(sound_clip[start:end]) == window_size):
                    signal = sound_clip[start:end]
                    melspec = librosa.feature.melspectrogram(signal, n_mels = bands)
                    logspec = librosa.logamplitude(melspec)
                    logspec = logspec.T.flatten()[:, np.newaxis].T
                    log_specgrams.append(logspec)
                    labels.append(label)
            
    log_specgrams = np.asarray(log_specgrams).reshape(len(log_specgrams),bands,frames,1)
    features = np.concatenate((log_specgrams, np.zeros(np.shape(log_specgrams))), axis = 3)
    for i in range(len(features)):
        features[i, :, :, 1] = librosa.feature.delta(features[i, :, :, 0])
    
    return np.array(features), np.array(labels,dtype = np.int)

def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

In [11]:
import numpy as np

def convertToOneHot(vector, num_classes=None):
    """
    Converts an input 1-D vector of integers into an output
    2-D array of one-hot vectors, where an i'th input value
    of j will set a '1' in the i'th row, j'th column of the
    output array.

    Example:
        v = np.array((1, 0, 4))
        one_hot_v = convertToOneHot(v)
        print one_hot_v

        [[0 1 0 0 0]
         [1 0 0 0 0]
         [0 0 0 0 1]]
    """

    assert isinstance(vector, np.ndarray)
    assert len(vector) > 0

    if num_classes is None:
        num_classes = np.max(vector)+1
    else:
        assert num_classes > 0
        assert num_classes >= np.max(vector)

    result = np.zeros(shape=(len(vector), num_classes))
    result[np.arange(len(vector)), vector] = 1
    return result
    #return result.astype(int)

In [12]:
#parent_dir = 'Sound-Data'
#sub_dirs= ['fold1','fold2']
parent_dir = '/Users/alaguna/Desktop/Ana/DEEP_LEARNING/BabyCrying_Challenge/UrbanSound/test/'
sub_dirs = ['dog_bark']


#parent_dir = '/Users/alaguna/Desktop/Ana/DEEP_LEARNING/BabyCrying_Challenge/BabySound/data/'

#sub_dirs = ['air_conditioner','children_playing','dog_bark']
#sub_dirs = ['hambre','hablando']

features,labels = extract_features(parent_dir,sub_dirs)
print(features)
print(labels)


[[[[ -3.17569987e+00  -1.57498598e+00]
   [ -3.04441216e+00  -1.74347876e+00]
   [ -7.25765141e+00  -1.51931845e+00]
   ..., 
   [ -2.76738299e+01  -4.30128968e-01]
   [ -2.87717568e+01  -2.94312720e-01]
   [ -2.59192593e+01   7.04089294e-02]]

  [[ -2.63070418e+01   2.56525955e-01]
   [ -2.59788525e+01   3.35505506e-01]
   [ -2.60015460e+01   3.53844666e-01]
   ..., 
   [ -4.11628798e+00   1.60733541e-01]
   [  2.32930759e+00   1.72803941e-01]
   [ -5.94882007e+00  -4.50653446e-02]]

  [[ -1.02287968e+01  -7.69854624e-01]
   [ -1.05659946e+01  -9.92412590e-01]
   [ -1.49717606e+01  -9.99379765e-01]
   ..., 
   [  6.63651743e+00   4.03870792e+00]
   [  5.48205664e+00   3.74910902e+00]
   [  4.61901074e+00   3.08024440e+00]]

  ..., 
  [[ -1.86382336e+01   2.47614996e+00]
   [ -1.91216868e+01   3.15861407e+00]
   [ -2.34950207e+01   2.95197614e+00]
   ..., 
   [ -1.73738138e+01  -3.55216436e-01]
   [ -1.72611697e+01  -1.74686171e-01]
   [ -1.67444678e+01  -5.57246113e-02]]

  [[ -1.4701

In [16]:
#labels = one_hot_encode(labels)
labels = convertToOneHot(labels)
print(labels)



IndexError: arrays used as indices must be of integer (or boolean) type

In [17]:
labels = convertToOneHot(labels)
print(labels)
train_test_split = np.random.rand(len(features)) < 0.70
print(train_test_split)
train_x = features[train_test_split]
print(train_x)
print(train_x.shape)
train_y = labels[train_test_split]
print(train_y)
print(type(train_y))
print(train_y.shape)
print("---")
test_x = features[~train_test_split]
print(test_x)
test_y = labels[~train_test_split]
print(test_y)



IndexError: arrays used as indices must be of integer (or boolean) type