In [1]:
import os
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.applications import inception_v3
from IPython import display
from keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
import librosa
from pathlib import Path
import os, shutil
import random
plt.ioff()

In [2]:
#!pip install audiomentations
#!pip install soundfile

In [3]:
#!rm -rf data_augm
#!mkdir data_augm
#!mkdir data_augm/dataset

In [5]:
import requests

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

file_id = '1jwIWW2vuBJVO-XcCTL9HgmcolCfQJ2ir'    
download_file_from_google_drive(file_id, "data.zip")

In [4]:
from zipfile import ZipFile
if not os.path.exists('data'):
    os.makedirs('data')
zf = ZipFile('data.zip', 'r')
zf.extractall('data')
zf.close()

In [5]:
data_dir = pathlib.Path("data/data_final")

In [6]:
def decode_audio(audio_binary):
  audio, _ = tf.audio.decode_wav(audio_binary)
  return tf.squeeze(audio, axis=-1)

In [7]:
def get_label(file_path):
  parts = tf.strings.split(file_path, os.path.sep)

  # Note: You'll use indexing here instead of tuple unpacking to enable this 
  # to work in a TensorFlow graph.
  return parts[-2].numpy().decode()

In [8]:
def get_waveform(file_path):
  audio_binary = tf.io.read_file(file_path)
  waveform = decode_audio(audio_binary)
  return waveform, audio_binary

In [9]:
frame_length = 2048
frame_step = 512
num_mel_bins = 128
num_spectrogram_bins = (frame_length // 2) + 1
fmin = 0.0
sample_rate = 44100
fmax = sample_rate / 2


def get_spectrogram(waveform):
    # Padding for files with less than 16000 samples
    zero_padding = tf.zeros([140000] - tf.shape(waveform), dtype=tf.float32) # NON SUPERARE I 3 SECONDI CON TIME STRETCH
    # Concatenate audio with padding so that all audio clips will be of the 
    # same length
    waveform = tf.cast(waveform, tf.float32)
    equal_length = tf.concat([waveform, zero_padding], 0)
    magnitude_spectrograms  = tf.signal.stft(
      equal_length, frame_length, frame_step)
    magnitude_spectrograms  = tf.abs(magnitude_spectrograms)
    
    # Step: magnitude_spectrograms->mel_spectrograms
    # Warp the linear-scale, magnitude spectrograms into the mel-scale.
    num_spectrogram_bins = magnitude_spectrograms.shape[-1]


    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins, num_spectrogram_bins, sample_rate, fmin,
        fmax)

    mel_spectrograms = tf.tensordot(
        magnitude_spectrograms, linear_to_mel_weight_matrix, 1)

    mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate(
  linear_to_mel_weight_matrix.shape[-1:]))

    # Compute a stabilized log to get log-magnitude mel-scale spectrograms.
    log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)

    # Compute MFCCs from log_mel_spectrograms and take the first 13.
    #mfccs = tf.signal.mfccs_from_log_mel_spectrograms(
    #  log_mel_spectrograms)[..., :75]
  
    return log_mel_spectrograms

In [10]:
# VERSIONE PROSTAZIA
#def plot_spectrogram(spectrogram, label, class_path, i):
#  fig, ax = plt.subplots(figsize=(20,20))
#  mfcc_data= np.swapaxes(spectrogram, 0 ,1)
#  cax = ax.imshow(mfcc_data, interpolation='nearest', cmap=cm.coolwarm, origin='lower')
#  ax.axis("off")
#  fig.savefig('data\dataset\mels{}\mel_{}_{}.png'.format(class_path.split("data\dataset\data_final")[1], label, i), bbox_inches='tight', pad_inches=0, dpi=300)

In [11]:
# VERSIONE FEDOUS
def draw_spectrogram(spectrogram, output_dir_path, i):
    fig, ax = plt.subplots(figsize=(20,20))
    mfcc_data= np.swapaxes(spectrogram, 0 ,1)
    cax = ax.imshow(mfcc_data, interpolation='nearest', cmap=cm.coolwarm, origin='lower')
    ax.axis("off")
    fig.savefig(f'{output_dir_path}/mel_{i}.png', bbox_inches='tight', pad_inches=0, dpi=300)
    plt.close()
  

### Split in Train - Val - Test

In [12]:
#!pip install split-folders

In [13]:
import splitfolders
splitfolders.ratio("data/data_final", output="data/split", seed=1337, ratio=(.7, .1, .2))

Copying files: 354 files [00:00, 774.05 files/s]


### Data Augmentation

In [14]:
from audiomentations import Compose, AddGaussianNoise, PitchShift, Normalize, FrequencyMask
import numpy as np
import soundfile as sf
import librosa

In [15]:
augm_01 = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1)
])

augm_02 = Compose([
    PitchShift(min_semitones=-4, max_semitones=4, p=1)
])

augm_03 = Compose([
    Normalize(p=1)
])

aug_l = [augm_01, augm_02, augm_03]

In [16]:
def augmentation(path, augment):
    y, sr = librosa.load(path)
    augmented_samples = augment(samples=y, sample_rate=sr)
    return augmented_samples, sr

In [17]:
main_dirs = ["data/split/train"] #2 folders

for main_dir in main_dirs:
    print(f"AUGMENTATION on {main_dir}")
    folders_pathlist = Path(main_dir).glob('*')

    for path in folders_pathlist: # per ogni cartella nella lista
        # because path is object not string
        class_path = str(path)
        print(class_path)
        wav_pathlist = Path(class_path).glob('./' + ('[0-9]' * 2) + ".wav")
        i = 300
        for w_path in wav_pathlist: # per ogni file audio
            #print(w_path)
            wav_path = str(w_path)
            label = get_label(wav_path)
            for aug in aug_l: # per ogni data augmentation da applicare
                augmented_samples, sr = augmentation(wav_path, aug)
                #print('{}/{}/{}.wav'.format(label,i)) #300 in avanti
                sf.write('{}/{}/{}.wav'.format(main_dir,label,i), augmented_samples, sr)
                i +=1


AUGMENTATION on data/split/train
data\split\train\Alces_alces
data\split\train\Bos_taurus
data\split\train\Cervus_elaphus
data\split\train\Equus_caballus
data\split\train\Lutra_lutra
data\split\train\Ovis
data\split\train\Pan
data\split\train\Panthera_leo
data\split\train\Procyon
data\split\train\Vulpes
AUGMENTATION on data/split/val
data\split\val\Alces_alces
data\split\val\Bos_taurus
data\split\val\Cervus_elaphus
data\split\val\Equus_caballus
data\split\val\Lutra_lutra
data\split\val\Ovis
data\split\val\Pan
data\split\val\Panthera_leo
data\split\val\Procyon
data\split\val\Vulpes


In [18]:

#try:
#    os.makedirs('data/dataset/mels')
#except:
#    print("Folder already exists, deleting its content to replace them with new ones.")
#    shutil.rmtree('data/dataset/mels')
#    os.makedirs('data/dataset/mels')

### Wav -> MEL

In [19]:
#Define function to apply SpecAugment to wav files

def spec_augment(spec: np.ndarray, num_mask=1, 
                 freq_masking_max_percentage=0.15, time_masking_max_percentage=0.05):

    spec = spec.copy()
    for i in range(num_mask):
        all_frames_num, all_freqs_num = spec.shape
        freq_percentage = random.uniform(0.05, freq_masking_max_percentage)
        
        num_freqs_to_mask = int(freq_percentage * all_freqs_num)
        f0 = np.random.uniform(low=0.02, high=all_freqs_num - num_freqs_to_mask)
        f0 = int(f0)
        #Set band to 0
        spec[:, f0:f0 + num_freqs_to_mask] = 0

        time_percentage = random.uniform(0.0, time_masking_max_percentage)
        
        num_frames_to_mask = int(time_percentage * all_frames_num)
        t0 = np.random.uniform(low=0.0, high=all_frames_num - num_frames_to_mask)
        t0 = int(t0)
        #Set band to 0
        spec[t0:t0 + num_frames_to_mask, :] = 0
    
    return spec

In [20]:
input_dirs = ["data/split/train", "data/split/val", "data/split/test"] 
output_dirs = ["data_mel/train", "data_mel/val", "data_mel/test"] 

if not os.path.exists('data_mel'):
    os.makedirs('data_mel')
for output_dir in output_dirs:
  if not os.path.exists(output_dir):
    os.makedirs(output_dir) 

In [21]:
# VERSIONE FEDOUS
plt.ioff()
for i in range(3): # per le 3 cartelle train-val-test
    folds = os.listdir(f'{input_dirs[i]}/') # cartelle delle classi
  
    for fold in folds:
        if not os.path.exists(f'{output_dirs[i]}/{fold}/'):
            os.makedirs(f'{output_dirs[i]}/{fold}')
        wav_files = (os.listdir(f'{input_dirs[i]}/{fold}/'))
    
        j = 0

        for wav_file in wav_files: # per ogni file wav
            wav_path = f'{input_dirs[i]}/{fold}/{wav_file}'
            output_dir_path = f'{output_dirs[i]}/{fold}'
            wave, _ = get_waveform(wav_path)
            #label = get_label(wav_path)
            mel = get_spectrogram(wave)
            draw_spectrogram(mel, output_dir_path, j)
            j += 1
            #we now apply SpecAugment for the same wav if it is an original audio
            if len(wav_path.split("/")[-1]) == 6:
                mel = get_spectrogram(wave)
                warped_masked_spectrogram = spec_augment(mel.numpy())
                draw_spectrogram(warped_masked_spectrogram, output_dir_path, j)
                j += 1

        print(f'{output_dirs[i]}/{fold}/ DONE')

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



KeyboardInterrupt

The above exception was the direct cause of the following exception:

SystemError: <built-in method write of _io.BufferedWriter object at 0x0000013EA3346F68> returned a result with an error set

The above exception was the direct cause of the following exception:

SystemError: <built-in method write of _io.BufferedWriter object at 0x0000013EA3346F68> returned a result with an error set

The above exception was the direct cause of the following exception:

SystemError: <built-in method write of _io.FileIO object at 0x0000013EA3343F48> returned a result with an error set

The above exception was the direct cause of the following exception:

SystemError: <built-in method write of _io.BufferedWriter object at 0x0000013EA3346F68> returned a result with an error set

The above exception was the direct cause of the following exception:

SystemError: <built-in method write of _io.BufferedWriter object at 0x0000013EA3346F68> returned a result with an error set

The above exce

TypeError: can only concatenate str (not "list") to str

In [None]:
# VERSIONE PROSTAZIA
#main_dir = "data/dataset/data_final"
#folders_pathlist = Path(main_dir).glob('*')

#for path in folders_pathlist:
    # because path is object not string
#    class_path = str(path)
#    print(class_path)
#    wav_pathlist = Path(class_path).glob('*')
#    i = 0
#    os.makedirs('data/dataset/mels/{}'.format(class_path.rsplit("data_final\\")[1]))
#    for w_path in wav_pathlist:
#        wav_path = str(w_path)
#        wave, _ = get_waveform(wav_path)
#        label = get_label(wav_path)
#        mel = get_spectrogram(wave)
#        plot_spectrogram(mel, label, class_path, i)
#        i +=1


In [23]:
#Divide dataset in train val test

In [24]:
#import splitfolders

In [25]:
#splitfolders.ratio("data/dataset/mels", output="data/dataset/mel_final", seed=1337, ratio=(.8, .1, .1)) # default values