# Create a data frame with audios

SOME THINGS TO IMPORT 

In [16]:
import os
import pandas as pd
import numpy as np
import librosa

from sklearn.preprocessing import LabelEncoder
from sklearn.utils import resample
from imblearn.over_sampling import RandomOverSampler


DATA FRAME FUNCTION

In [31]:
## Give the name of the folder where the audio samples are, if they are in another directory give the complete path

def snippet_df_maker(folder):
    """ This function takes as a parameter the path or name to a folder where audio files are saved.
        The process made in the function is:
        
        1. Creates a list with the files names of all the audio snippets. 
        2. With a for loop, iterates through the files and take the audio waves and the labe(class).
        3. It appends each information(iteration into a corresponding list.
        4. Creates a dataFrame where the clumns are each of the lists.
    """
    # Create empty lists to store the data
    file_names = []
    srs = []
    audios = []
    labels = []
    #mfcc = []
    
    file_list = sorted(file for file in os.listdir(folder) if file.endswith('.wav'))
    

    
    for file in file_list:
        
        # load the audio file with librosa
        audio, sr = librosa.load(os.path.join(folder, file))
        audio_norm = librosa.util.normalize(audio)
        # split the filename into the label and ID columns
        label = file.split('_')[0]#, file.split('_')[1].split('.')[0]
        #mfccs = librosa.feature.mfcc(y=y, sr=sr)[0:13]
        
        file_names.append(file)
        srs.append(sr)
        audios.append(audio_norm)
        labels.append(label)
        #mfcc.append(mfccs)

        # convert the lists to a pandas dataframe
        snippet_df = pd.DataFrame(
            {'file_name':file_names,
            'sample_rate': srs,
            'audio': audios,
            'label': labels})
            #"mfccs": mfcc})
        
        # add length column
        snippet_df['length'] = (snippet_df['audio'].apply(lambda x: len(x))/snippet_df['sample_rate'])
        
        # include only rows where length between 0.2 and 10 seconds
        snippet_df = snippet_df[snippet_df['length'] > 0.2]
        snippet_df = snippet_df[snippet_df['length'] < 4]
         
    return snippet_df

In [32]:
#create the dataFrame
folder = "clean_data/snippet_samples" 
df = snippet_df_maker(folder)

In [33]:
df.head(2)


Unnamed: 0,file_name,sample_rate,audio,label,length
0,bark_00001.wav,22050,"[0.023036616, 0.023904754, 0.02477289, 0.02347...",bark,0.695964
1,bark_00005.wav,22050,"[0.0009460449, 0.001159668, 0.00091552734, 0.0...",bark,1.088027


In [34]:

le = LabelEncoder()
df["num_class"] = le.fit_transform(df["label"])

In [35]:
le.classes_

array(['bark', 'growl', 'pant', 'whine'], dtype=object)

In [36]:
df.head(2)


Unnamed: 0,file_name,sample_rate,audio,label,length,num_class
0,bark_00001.wav,22050,"[0.023036616, 0.023904754, 0.02477289, 0.02347...",bark,0.695964,0
1,bark_00005.wav,22050,"[0.0009460449, 0.001159668, 0.00091552734, 0.0...",bark,1.088027,0


In [37]:
(df.num_class.value_counts()/df.shape[0])*100


1    27.404516
3    27.097853
2    23.919710
0    21.577920
Name: num_class, dtype: float64

In [38]:
df.num_class.value_counts()

1    983
3    972
2    858
0    774
Name: num_class, dtype: int64

In [39]:
(df.label.value_counts()/df.shape[0])*100

growl    27.404516
whine    27.097853
pant     23.919710
bark     21.577920
Name: label, dtype: float64

In [40]:
df.label.value_counts()

growl    983
whine    972
pant     858
bark     774
Name: label, dtype: int64

In [41]:
ros = RandomOverSampler(random_state=0)

In [42]:
df_resampled, df_resampled.label = ros.fit_resample(df, df.label)

In [43]:
(df_resampled.label.value_counts()/df_resampled.shape[0])*100

bark     25.0
growl    25.0
pant     25.0
whine    25.0
Name: label, dtype: float64

In [44]:
df_resampled.label.value_counts()

bark     983
growl    983
pant     983
whine    983
Name: label, dtype: int64

# Create the spectograms 

THINGS TO IMPORT 

In [45]:
import matplotlib.pyplot as plt
import gc
from pathlib import Path

In [46]:
# Give the name/ path where the audio data is and the folder where the spectrograms will be saved. 
audio_data_directory = "clean_data/snippet_samples"
spectrograms_directory = "clean_data/spectogram_samples"

In [47]:
def generate_spectrograms(row):
    """
    IMPORTANT: CHECK COLUMNS NAMES
    This function takes as parameters a row of the data frame and creates the spectogram for each audio, 
    saving them as .png in new folders based on the class where they belong.
    
    """

    audio_class = row["num_class"]
    spect_directory = "class_" + str(audio_class)
    audio_file_name_without_extension = row["file_name"][:-4]
    
    y = row["audio"]
    
    spectrogram = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
    librosa.display.specshow(spectrogram, y_axis='linear')
    
    plt.savefig(spectrograms_directory + "/" + spect_directory + "/" + audio_file_name_without_extension + ".png")

    plt.clf() 
    plt.close('all')
    gc.collect()

CREATE THE FOLDERS FOR EACH CLASS 

In [48]:
## This creates folder in the spectograms directory to divide the new images depending on the classes. 
for i in range(0,4):
    Path(spectrograms_directory + "/" + "class_" + str(i)).mkdir(parents=True, exist_ok=True)
    

In [49]:
df_resampled.apply(generate_spectrograms, axis=1)

0       None
1       None
2       None
3       None
4       None
        ... 
3927    None
3928    None
3929    None
3930    None
3931    None
Length: 3932, dtype: object

In [50]:
from keras.utils import image_dataset_from_directory, load_img

In [51]:
(df_resampled.num_class.value_counts()/df.shape[0])*100

0    27.404516
1    27.404516
2    27.404516
3    27.404516
Name: num_class, dtype: float64

In [52]:
X_train = image_dataset_from_directory(spectrograms_directory,validation_split = 0.2,
                                                              subset = "training", seed=7)
X_test = image_dataset_from_directory(spectrograms_directory,validation_split = 0.2,
                                                             subset="validation", seed=7)

Found 3587 files belonging to 4 classes.
Using 2870 files for training.
Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB



2023-03-14 12:27:13.324268: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-14 12:27:13.324619: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Found 3587 files belonging to 4 classes.
Using 717 files for validation.


In [53]:
from tensorflow.keras import models, layers, losses, optimizers
from tensorflow.keras.callbacks import EarlyStopping

In [54]:
model = models.Sequential()

model.add(layers.Reshape((256, 256, 3), input_shape=(256, 256, 3)))
model.add(layers.experimental.preprocessing.Rescaling(1.0/255.0))

model.add(layers.Conv2D(32, 7, strides = 4, padding="same"))
model.add(layers.BatchNormalization())
model.add(layers.Activation("relu"))

model.add(layers.MaxPooling2D((4,4)))
model.add(layers.Conv2D(128, 3, padding="same"))
model.add(layers.BatchNormalization())

model.add(layers.Activation("relu"))
model.add(layers.Flatten())

model.add(layers.Dense(256))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.3))
model.add(layers.Activation("relu"))

model.add(layers.Dense(5, activation="softmax"))

In [55]:
model.compile(optimizer= "adam", loss=losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

In [56]:
es = EarlyStopping(patience= 40, restore_best_weights= True)

In [57]:
model_history = model.fit(X_train, validation_data=X_test, epochs=500, verbose=1, callbacks=[es])

Epoch 1/500


2023-03-14 12:27:14.665204: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-03-14 12:27:14.667287: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-03-14 12:27:24.948830: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500


In [58]:
print (f'Last loss is {model_history.__dict__["history"]["loss"][-1]}')
print (f'Last acuracy is {model_history.__dict__["history"]["accuracy"][-1]}')
print (f'Last validation loss is {model_history.__dict__["history"]["val_loss"][-1]}')
print (f'Last validation acuracy is {model_history.__dict__["history"]["val_accuracy"][-1]}')

Last loss is 0.0016290458152070642
Last acuracy is 0.9996516108512878
Last validation loss is 0.2852921187877655
Last validation acuracy is 0.9470013976097107


In [None]:
plt.figure(figsize=(15,5))

plt_loss = plt.subplot(121)
plt_loss.plot(model_history.history["loss"])
plt_loss.plot(model_history.history["val_loss"])

# plt.title("")
plt.ylabel("Loss")
plt.xlabel("Epoch")

plt.legend(["Training", "Validation"], loc="upper right")

plt_accuracy = plt.subplot(122)
plt_accuracy.plot(model_history.history["accuracy"])
plt_accuracy.plot(model_history.history["val_accuracy"])
# plt.title("")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Training", "Validation"], loc="lower right")
plt.ylim(0,1)

In [None]:
from PIL import Image

# TEST

In [None]:
# Create empty lists to store the data
file_names = []
audios = []
labels = []
 
file_list = sorted(os.listdir("clean_data/test_1"))
file_list.remove(".DS_Store")

    
for file in file_list:
        
    # load the audio file with librosa
    audio, sr = librosa.load(os.path.join("clean_data/test_1", file))
    audio_norm = librosa.util.normalize(audio)
    # split the filename into the label and ID columns
    label = file.split('_')[0]#, file.split('_')[1].split('.')[0]
        
        
    file_names.append(file)
    audios.append(audio_norm)
    labels.append(label)
     

    # convert the lists to a pandas dataframe
    test_df = pd.DataFrame(
            {'file_name':file_names,
            'audio': audios,
            'label': labels})

In [None]:
test_df

In [None]:
def new_spect(row):

    spect_directory = "clean_data/spectogram_test_1"
    audio_file_name_without_extension = row["file_name"][:-4]
    
    y = row["audio"]
    
    spectrogram = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
    librosa.display.specshow(spectrogram, y_axis='linear')
    
    plt.savefig(spect_directory + "/" + audio_file_name_without_extension + ".png")

    plt.clf() 
    plt.close('all')
    gc.collect()


In [None]:
test_df.apply(new_spect, axis=1)

In [None]:
test_df

In [None]:
def prediction (row):
    audio_file_name_without_extension = row["file_name"][:-4]
    X_test = np.array(load_img(f'clean_data/spectogram_test_1/{audio_file_name_without_extension}.png'))
    resized_img = np.array(Image.fromarray(X_test).resize((256, 256)))
    row["prediction"] = model.predict(np.expand_dims(resized_img,axis=0))

    return row

In [None]:
test_df_pred = test_df.apply(prediction, axis=1)

In [None]:
test_df_pred 

In [None]:
def result_pred (row):
    prediction = row["prediction"]
    print (f'name = {row["file_name"]}')
    print (f'real = {row["label"]}')
    print(f'bark = {list(prediction[0])[0]*100}')
    print(f'growl = {list(prediction[0])[1]*100}')
    print(f'pant = {list(prediction[0])[2]*100}')
    print(f'whine = {list(prediction[0])[3]*100}')
    print("_____________________________________________")

In [None]:
test_df_pred.apply(result_pred, axis=1)

# Test youtube


In [None]:
# Create empty lists to store the data
file_names = []
audios = []
labels = []
 
file_list = sorted(os.listdir("clean_data/youtube"))
#file_list.remove(".DS_Store")

    
for file in file_list:
        
    # load the audio file with librosa
    audio, sr = librosa.load(os.path.join("clean_data/youtube", file))
    audio_norm = librosa.util.normalize(audio)
    # split the filename into the label and ID columns
    label = file.split('_')[0]#, file.split('_')[1].split('.')[0]
        
        
    file_names.append(file)
    audios.append(audio_norm)
    labels.append(label)
     

    # convert the lists to a pandas dataframe
    test_df = pd.DataFrame(
            {'file_name':file_names,
            'audio': audios,
            'label': labels})

In [None]:
test_df.apply(new_spect, axis=1)
