In [1]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, InputLayer
from keras.utils import to_categorical

In [2]:
!pip install librosa==0.8.0

Collecting librosa==0.8.0
[?25l  Downloading https://files.pythonhosted.org/packages/26/4d/c22d8ca74ca2c13cd4ac430fa353954886104321877b65fa871939e78591/librosa-0.8.0.tar.gz (183kB)
[K     |█▉                              | 10kB 15.6MB/s eta 0:00:01[K     |███▋                            | 20kB 2.9MB/s eta 0:00:01[K     |█████▍                          | 30kB 3.6MB/s eta 0:00:01[K     |███████▏                        | 40kB 3.9MB/s eta 0:00:01[K     |█████████                       | 51kB 3.4MB/s eta 0:00:01[K     |██████████▊                     | 61kB 3.7MB/s eta 0:00:01[K     |████████████▌                   | 71kB 4.0MB/s eta 0:00:01[K     |██████████████▎                 | 81kB 4.3MB/s eta 0:00:01[K     |████████████████                | 92kB 4.7MB/s eta 0:00:01[K     |█████████████████▉              | 102kB 4.6MB/s eta 0:00:01[K     |███████████████████▋            | 112kB 4.6MB/s eta 0:00:01[K     |█████████████████████▍          | 122kB 4.6MB/s eta 0:00:

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [19]:
base_path = '/content/gdrive/My\\ Drive/Colab\\ Notebooks'

In [5]:
!mkdir -p $base_path
!wget https://github.com/derekfernandez/tp2-ia-rna/raw/main/download-dataset.sh -P $base_path/
!chmod 773 $base_path/*.sh
!cd $base_path && $base_path/download-dataset.sh

--2020-10-25 10:09:25--  https://github.com/derekfernandez/tp2-ia-rna/raw/main/download-dataset.sh
Resolving github.com (github.com)... 192.30.255.112
Connecting to github.com (github.com)|192.30.255.112|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/derekfernandez/tp2-ia-rna/main/download-dataset.sh [following]
--2020-10-25 10:09:25--  https://raw.githubusercontent.com/derekfernandez/tp2-ia-rna/main/download-dataset.sh
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 171 [text/plain]
Saving to: ‘/content/gdrive/My Drive/Colab Notebooks/download-dataset.sh’


2020-10-25 10:09:26 (3.32 MB/s) - ‘/content/gdrive/My Drive/Colab Notebooks/download-dataset.sh’ saved [171/171]

--2020-10-25 10:09:26--  htt

In [6]:
import os
# Input & training config

path = base_path.replace("\\", "")

DATA_PATH = os.path.join(path, 'dataset/train/')
TEST_PATH = os.path.join(path, 'dataset/test/')

n_mels = 14
n_frames = 126
n_epochs = 100
batch_size = 128
n_classes = 2

In [7]:
import librosa
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import numpy as np
from tqdm import tqdm

def get_labels(path=DATA_PATH):
    labels = os.listdir(path)
    label_indices = np.arange(0, len(labels))
    return labels, label_indices, to_categorical(label_indices)

def wav2mfcc(file_path, max_len=n_frames):
    wave, sr = librosa.load(file_path, mono=True, sr=None)
    mfcc = librosa.feature.mfcc(wave, sr=sr, n_mfcc=14, n_fft=2048, hop_length=128, win_length=512, center=True, fmin=30, fmax=3000, window='hamming')

    # Si el audio es mas corto que lo definido, completar con 0s a ambos lados del eje
    if (max_len > mfcc.shape[1]):
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')

    # Si es mas largo, se recorta
    else:
        mfcc = mfcc[:, :max_len]
    
    return mfcc

def save_data_to_array(path=DATA_PATH, max_len=n_frames):
    labels, _, _ = get_labels(path)

    for label in labels:

        mfcc_vectors = []

        wavfiles = [path + label + '/' + wavfile for wavfile in os.listdir(path + '/' + label)]
        for wavfile in tqdm(wavfiles, "Saving vectors of label - '{}'".format(label)):
            mfcc = wav2mfcc(wavfile, max_len=max_len)
            mfcc_vectors.append(mfcc)
        np.save(label + '.npy', mfcc_vectors)


def get_train_test(path=DATA_PATH, split_ratio=0.6, random_state=42):

    labels, indices, _ = get_labels(path)

    X = np.load(labels[0] + '.npy')
    y = np.zeros(X.shape[0])

    for i, label in enumerate(labels[1:]):
        x = np.load(label + '.npy')
        X = np.vstack((X, x))
        y = np.append(y, np.full(x.shape[0], fill_value= (i + 1)))

    assert X.shape[0] == len(y)

    return train_test_split(X, y, test_size= (1 - split_ratio), random_state=random_state, shuffle=True)

In [8]:
save_data_to_array(path=DATA_PATH, max_len=n_frames)

X_train, X_test, y_train, y_test = get_train_test(path=DATA_PATH)

y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)

Saving vectors of label - 'negative': 100%|██████████| 1580/1580 [00:26<00:00, 60.37it/s]
Saving vectors of label - 'sheila': 100%|██████████| 1537/1537 [00:27<00:00, 55.35it/s]


In [9]:
def get_model(n_hidden_layers):
    fc_val = [2 << i for i in range(n_hidden_layers)]
    fc_val.sort(reverse=True)
    
    model = Sequential()
    model.add(Flatten())
    
    for ix, val in enumerate(fc_val, 1):
        if ix == 1:
            model.add(Dense(val, activation='relu', input_shape=(n_mels, n_frames, 1)))
        elif ix == n_hidden_layers:
            assert(val == n_classes)
            model.add(Dense(n_classes, activation='softmax'))
        else:
            model.add(Dense(val, activation='relu'))
    
    model.compile(loss=keras.losses.categorical_crossentropy, 
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])
    return model

In [10]:
model = get_model(7)
model.fit(X_train, y_train_hot, batch_size=1, epochs=n_epochs, verbose=1, validation_data=(X_test, y_test_hot))
model.summary()

model.save('./rna_mlp') 

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [11]:
def predict(filepath, model):
    sample = wav2mfcc(filepath)
    sample_reshaped = sample.reshape(1, n_mels, n_frames)
    return get_labels()[0][
            np.argmax(model.predict(sample_reshaped))
    ]

In [12]:
from keras.models import load_model
model = load_model('./rna_mlp')

In [13]:
pos_dir = os.path.join(TEST_PATH,'sheila')
neg_dir = os.path.join(TEST_PATH,'negative')

In [14]:
total_count = 0
true_positives = 0
false_positives = 0
true_negatives = 0
false_negatives = 0

In [15]:
print('**********************************')
print('Predictions for positive samples')
print('**********************************')
for f in os.listdir(pos_dir):
    pred = predict(os.path.join(pos_dir, f), model=model)
    print(f + ': ', pred)
    total_count += 1
    if pred == 'sheila':
        true_positives += 1
    else:
        false_negatives += 1
        
print('**********************************')
print('Predictions for negative samples')
print('**********************************')
for f in os.listdir(neg_dir):
    pred = predict(os.path.join(neg_dir, f), model=model)
    print(f + ': ', pred)
    total_count += 1
    if pred == 'negative':
        true_negatives += 1
    else:
        false_positives += 1

**********************************
Predictions for positive samples
**********************************
004ae714_nohash_0.wav:  sheila
00f0204f_nohash_2.wav:  sheila
01b4757a_nohash_0.wav:  sheila
01b4757a_nohash_1.wav:  sheila
01b4757a_nohash_2.wav:  sheila
01bb6a2a_nohash_0.wav:  sheila
01bcfc0c_nohash_0.wav:  sheila
01bcfc0c_nohash_1.wav:  sheila
01d22d03_nohash_0.wav:  sheila
02fcd241_nohash_0.wav:  sheila
05b2db80_nohash_0.wav:  sheila
05b2db80_nohash_1.wav:  negative
05cf43ef_nohash_0.wav:  negative
05cf43ef_nohash_1.wav:  sheila
05cf43ef_nohash_2.wav:  sheila
0b09edd3_nohash_0.wav:  sheila
0b09edd3_nohash_1.wav:  sheila
0b40aa8e_nohash_0.wav:  sheila
0b40aa8e_nohash_1.wav:  sheila
0b56bcfe_nohash_0.wav:  sheila
0b56bcfe_nohash_1.wav:  sheila
0c40e715_nohash_0.wav:  sheila
0c5027de_nohash_0.wav:  sheila
0c5027de_nohash_1.wav:  sheila
0d393936_nohash_0.wav:  sheila
0d393936_nohash_1.wav:  sheila
0d393936_nohash_2.wav:  sheila
0d53e045_nohash_0.wav:  negative
0d53e045_nohash_1.wav: 

In [16]:
print('True positives: {}'.format(true_positives))
print('True negatives: {}'.format(true_negatives))
print('False positives: {}'.format(false_positives))
print('False negatives: {}'.format(false_negatives))
print('Total samples: {}'.format(total_count))

false_positive_ratio = false_positives / total_count *100
false_negative_ratio = false_negatives / total_count *100

accuracy = (true_positives + true_negatives) / total_count *100
print('****************************')
print('Accuracy: {:.2f}'.format(accuracy))
print('False positives ratio: {:.2f}'.format(false_positive_ratio))
print('False negatives ratio: {:.2f}'.format(false_negative_ratio))

True positives: 164
True negatives: 192
False positives: 26
False negatives: 33
Total samples: 415
****************************
Accuracy: 85.78
False positives ratio: 6.27
False negatives ratio: 7.95


In [20]:
!cd $base_path && rm -rf ./dataset/ && rm ./download-dataset.sh