In [1]:
import os
import torchaudio
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import load_dataset
import librosa.display
import tensorflow as tf

In [76]:
capuchin_calls_dir = 'Parsed_Capuchinbird_Clips'
not_capuchin_calls_dir = 'Parsed_Not_Capuchinbird_Clips'
gunshots = "gunshots"
lion = "Pure_Lion"

In [3]:
wav,sr = librosa.load("Parsed_Capuchinbird_Clips/XC114131-0.wav",sr=16000)
wav

array([ 0.00129213,  0.0071058 ,  0.00394941, ...,  0.00346815,
       -0.0087024 , -0.00559176], dtype=float32)

In [106]:
audio_files = []
labels = []
for root, dirs, files in os.walk(capuchin_calls_dir):
    for file in files:
        if file.endswith(".wav"):
            audio_files.append(os.path.join(root, file))
            labels.append(1)
for root, dirs, files in os.walk(not_capuchin_calls_dir):
    for file in files:
        if file.endswith(".wav"):
            audio_files.append(os.path.join(root, file))
            labels.append(0)
for root, dirs, files in os.walk(gunshots):
    for file in files:
        if file.endswith(".wav"):
            audio_files.append(os.path.join(root, file))
            labels.append(2)
for root, dirs, files in os.walk(lion):
    for file in files:
        if file.endswith(".wav"):
            audio_files.append(os.path.join(root, file))
            labels.append(3)


In [107]:
len(labels)

1210

In [108]:
train_audio_files, val_audio_files, train_labels, val_labels = train_test_split(audio_files, labels, test_size=0.2, random_state=42)

In [109]:
import torchaudio.transforms as T

In [110]:
def preprocess_audio(file_path):
    waveform, sample_rate = librosa.load(file_path,sr=16000)
    # apply any preprocessing steps here, such as resampling or normalization
    # waveform = T.Resample(sample_rate,new_freq=16000)

    return waveform,sample_rate


In [111]:
# import numpy as np
# np.set_printoptions(threshold=np.inf)

In [112]:
# print(preprocess_audio("Parsed_Capuchinbird_Clips/XC114131-0.wav"))

In [113]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [114]:
train_examples = [{'audio_filepath': file_path,'array':preprocess_audio(file_path),'label': label} for file_path, label in zip(train_audio_files, train_labels)]
val_examples = [{'audio_filepath': file_path,'array':preprocess_audio(file_path), 'label': label} for file_path, label in zip(val_audio_files, val_labels)]

In [115]:
len(train_examples)

968

In [116]:
print((train_examples[913]))

{'audio_filepath': 'gunshots\\197320-6-7-0.wav', 'array': (array([ 7.3652096e-05,  1.1064417e-04,  1.1921859e-04, ...,
       -2.3204114e-04, -6.5001375e-05, -8.6965912e-05], dtype=float32), 16000), 'label': 2}


In [123]:
for i in range(len(val_examples)):
    if ("Pure_Lion" in val_examples[i]["audio_filepath"]):
        print(val_examples[i])
        print(i)
        break

{'audio_filepath': 'Pure_Lion\\pure_r11.wav', 'array': (array([0.00045361, 0.00041826, 0.00087716, ..., 0.01956777, 0.01665892,
       0.        ], dtype=float32), 16000), 'label': 3}
9


In [118]:
train_file_paths = [example['audio_filepath'] for example in train_examples]
train_labels = [example['label'] for example in train_examples]
train_array = [example['array'][0].tolist() for example in train_examples]
val_file_paths = [example['audio_filepath'] for example in val_examples]
val_labels = [example['label'] for example in val_examples]
val_array = [example['array'][0].tolist() for example in val_examples]
train_dataset_dict = {'file_path': train_file_paths, 'label': train_labels,'array':train_array}
val_dataset_dict = {'file_path': val_file_paths, 'label': val_labels,'array':val_array}


In [119]:
train_array[0]

[-6.658103302470408e-06,
 0.0001664743322180584,
 0.00018930243095383048,
 0.00022760732099413872,
 2.9466443152159627e-07,
 -0.00017786274838726968,
 -0.00016434107965324074,
 -3.076578286709264e-05,
 -1.9731049178517424e-05,
 -0.0001749388175085187,
 -0.00010675813246052712,
 -0.0003647219273261726,
 -0.00028034360730089247,
 -0.0001496647746535018,
 -0.00025284336879849434,
 7.709818601142615e-05,
 -6.3077268350753e-06,
 -0.0001440265914425254,
 4.726243787445128e-06,
 0.00014467205619439483,
 1.974285078176763e-05,
 0.00011250781244598329,
 0.00029271465609781444,
 -9.41241978580365e-06,
 -1.5705820260336623e-05,
 0.00015441486903000623,
 0.00028761624707840383,
 0.00022400212765205652,
 0.0002208872901974246,
 5.3996918722987175e-05,
 -0.00028377847047522664,
 -0.00038870665594004095,
 -0.00036484221345745027,
 -0.00023150144261308014,
 -0.0004046497051604092,
 -0.0005973089719191194,
 -0.00040854947292245924,
 -0.00044876450556330383,
 -0.0003318932431284338,
 -0.0001354683918179

In [120]:
from datasets import Dataset,DatasetDict
train_dataset = Dataset.from_dict(train_dataset_dict)
test_dataset = Dataset.from_dict(val_dataset_dict)
dataset_dict = DatasetDict({'train': train_dataset, 'test': test_dataset})

In [121]:
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['file_path', 'label', 'array'],
        num_rows: 968
    })
    test: Dataset({
        features: ['file_path', 'label', 'array'],
        num_rows: 242
    })
})

In [122]:
import pickle

with open('dataset_dict_with_gun_lion.pickle', 'wb') as f:
    pickle.dump(dataset_dict, f)

# Load the dictionary from the pickle object
with open('dataset_dict_with_gun_lion.pickle', 'rb') as f:
    loaded_dict = pickle.load(f)

# Print the loaded dictionary
print(type(loaded_dict))


<class 'datasets.dataset_dict.DatasetDict'>


In [72]:
with open('dataset_dict_with_gun.pickle', 'rb') as f:
    loaded_dict = pickle.load(f)

# Print the loaded dictionary
print(loaded_dict)


DatasetDict({
    train: Dataset({
        features: ['file_path', 'label', 'array'],
        num_rows: 948
    })
    test: Dataset({
        features: ['file_path', 'label', 'array'],
        num_rows: 237
    })
})


# to-do add the audio as well

In [19]:
import transformers

In [20]:
print(transformers.__version__)

4.25.1
