# 1. Importar Frameworks

## Instalación

In [None]:
!pip install matplotlib
!pip install tensorflow_io==0.23.1

## Cargar Frameworks


In [None]:
import os

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow_io as tfio

## Cargar archivos de Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# 2. Cear Dataset de Tensorflow

## Cargar audio

In [None]:
def load_wav_16k_mono(filename):
    # Load encoded wav file
    file_contents = tf.io.read_file(filename)
    # Decode wav (tensors by channels) 
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    # Removes trailing axis
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Goes from 44100Hz to 16000hz - amplitude of the audio signal
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

## Definir ruta negativa y positiva

In [None]:
POS = os.path.join('/content/drive/MyDrive/DLBirds/ZorzalComun')
NEG = os.path.join('/content/drive/MyDrive/DLBirds/Ruido')

In [None]:
pos = tf.data.Dataset.list_files(POS+'/*.wav')
neg = tf.data.Dataset.list_files(NEG+'/*.wav')

## Combinar negativos y positivos

In [None]:
positives = tf.data.Dataset.zip((pos, tf.data.Dataset.from_tensor_slices(tf.ones(len(pos)))))
negatives = tf.data.Dataset.zip((neg, tf.data.Dataset.from_tensor_slices(tf.zeros(len(neg)))))
data = positives.concatenate(negatives)

# 3. Deternminar longitud media de los audios

In [None]:
lengths = []
for file in os.listdir(os.path.join('/content/drive/MyDrive/DLBirds/Coturnix')):
    tensor_wave = load_wav_16k_mono(os.path.join('/content/drive/MyDrive/DLBirds/Coturnix', file))
    lengths.append(len(tensor_wave))

# 4. Construir espectograma.

## Función de preprocesamiento

In [None]:
def preprocess(file_path, label): 
    wav = load_wav_16k_mono(file_path)
    wav = wav[:48000]
    zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav],0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram, label

In [None]:
filepath, label = positives.shuffle(buffer_size=10000).as_numpy_iterator().next()

In [None]:
spectrogram, label = preprocess(filepath, label)

In [None]:
plt.figure(figsize=(30,20))
plt.imshow(tf.transpose(spectrogram)[0])
plt.show()

# 5. Crear particiones de entrenamiento

## Create a Tensorflow Data Pipeline

In [None]:
data = data.map(preprocess)
data = data.cache()
data = data.shuffle(buffer_size=1000)
data = data.batch(16)
data = data.prefetch(8)

## Dividir en particiones de entrenamiento

In [None]:
len(data)*.7

In [None]:
#El resultado obtenido en la línea anterior se introduce en data.take y data.skip
train = data.take(31)
test = data.skip(31).take(15)

## Entrenando un Batch

In [None]:
samples, labels = train.as_numpy_iterator().next()

In [None]:
samples.shape

# 6. Construir modelo Deep Learning

## Cargar frameworks de Tensorfflow

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten

## Modelo secuencial

In [None]:
model = Sequential()
model.add(Conv2D(16, (3,3), activation='relu', input_shape=(1491, 257,1)))
model.add(Conv2D(16, (3,3), activation='relu'))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile('Adam', loss='BinaryCrossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision()])

In [None]:
model.summary()

## Entrenar el modelo

In [None]:
hist = model.fit(train, epochs=5, validation_data=test)

In [None]:
plt.title('Pérdidas')
plt.plot(hist.history['loss'], 'r')
plt.plot(hist.history['val_loss'], 'b')
plt.xlabel("Epochs")
plt.show()

In [None]:
plt.title('Precisión')
plt.plot(hist.history['precision'], 'r')
plt.plot(hist.history['val_precision'], 'b')
plt.xlabel("Epochs")
plt.show()

In [None]:
plt.title('Recall')
plt.plot(hist.history['recall'], 'r')
plt.plot(hist.history['val_recall'], 'b')
plt.xlabel("Epochs")
plt.show()

# 7. Validación

## Cargar archivos MP3

In [None]:
def load_mp3_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    res = tfio.audio.AudioIOTensor(filename)
    # Convert to tensor and combine channels 
    tensor = res.to_tensor()
    tensor = tf.math.reduce_sum(tensor, axis=1) / 2 
    # Extract sample rate and cast
    sample_rate = res.rate
    sample_rate = tf.cast(sample_rate, dtype = tf.int64)
    wav = tf.cast(sample_rate, dtype=tf.int64)
    # Resample to 16 kHz
    wav = tfio.audio.resample(tensor, sample_rate, 16000)
    return wav

In [None]:
mp3 = os.path.join('/content/drive/MyDrive/DLBirds/ZorzalTest/XC112569 - Codorniz común - Coturnix coturnix.mp3')

In [None]:
wav = load_mp3_16k_mono(mp3)
wav

In [None]:
audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)

In [None]:
samples, index = audio_slices.as_numpy_iterator().next()
samples.shape


In [None]:
len(audio_slices)

## Función para enventanar los espectogramas

In [None]:
def preprocess_mp3(sample, index):
    sample = sample[0]
    zero_padding = tf.zeros([48000] - tf.shape(sample), dtype=tf.float32)
    wav = tf.concat([zero_padding, sample],0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram

## Convertir los audios en ventanas y realizar predicciones

In [None]:
audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)
audio_slices = audio_slices.map(preprocess_mp3)
audio_slices = audio_slices.batch(64)

In [None]:
yhat = model.predict(audio_slices)
yhat

In [None]:
yhat = [1 if prediction > 0.99 else 0 for prediction in yhat]
yhat

#8. Realizar Predicciones en varios audios

## Bucle para cargar y evaluar los audios de una carpeta

In [None]:
results = {}
for file in os.listdir(os.path.join('/content/drive/MyDrive/DLBirds/ZorzalTest')):
    FILEPATH = os.path.join('/content/drive/MyDrive/DLBirds/ZorzalTest', file)
    
    wav = load_mp3_16k_mono(FILEPATH)
    audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)
    audio_slices = audio_slices.map(preprocess_mp3)
    audio_slices = audio_slices.batch(64)
    
    yhat = model.predict(audio_slices)
    
    results[file] = yhat

## Convertir predicciones en clases

In [None]:
class_preds = {}
for file, logits in results.items():
    class_preds[file] = [1 if prediction > 0.9 else 0 for prediction in logits]
    sum = np.sum(class_preds[file])
    class_preds[file] = 1 if sum > 0 else 0

class_preds