In [14]:
# the cnn module provides classes for training/predicting with various types of CNNs
import opensoundscape

#other utilities and packages
import os
import torch
from pathlib import Path
import numpy as np
import pandas as pd
import random
import subprocess
from glob import glob
import sklearn

#set up plotting
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize']=[15,5] #for large visuals
%config InlineBackend.figure_format = 'retina'


In [2]:
#Create metadata CSV for OpenSoundScape module


data_dir = os.path.join(os.path.dirname(os.getcwd()), 'data')

filepath_presence_dict = {"filepath":[], "presence":[]}

weto_train_dir = os.path.join(data_dir, 'weto', 'train')
for set_key in ['positive', 'negative']:
    set_dir = os.path.join(weto_train_dir, set_key)
    filenames = os.listdir(set_dir)
    filepath_list = filepath_presence_dict["filepath"]
    presence_list = filepath_presence_dict["presence"]
    for name in filenames:
        filepath = os.path.join(set_dir, name)
        filepath_list.append(filepath)
    filepath_presence_dict.update({'filepath': filepath_list})
    if set_key == "positive":
        presence_list = list(np.repeat("positive", len(filenames)))
        filepath_presence_dict.update({'presence': presence_list})
    if set_key == "negative":
        presence_list.extend(list(np.repeat("negative", len(filenames))))
        filepath_presence_dict.update({'presence': presence_list})    


In [16]:
meta_weto = pd.get_dummies(pd.DataFrame(filepath_presence_dict), columns = ['presence'], prefix = "", prefix_sep="", dtype = int).set_index('filepath')
display(meta_weto.head(3), meta_weto.tail(3))
print(f"There are {np.sum(meta_weto['positive'])} positive samples")
print(f"There are {np.sum(meta_weto['negative'])} negative samples")

Unnamed: 0_level_0,negative,positive
filepath,Unnamed: 1_level_1,Unnamed: 2_level_1
c:\Users\gavin hurd\Documents\bioacoustics_local\data\weto\train\positive\A-11-E_20210430_230000_99_5_0-3.wav,0,1
c:\Users\gavin hurd\Documents\bioacoustics_local\data\weto\train\positive\A-11-E_20210503_230000_26_9_0-3.wav,0,1
c:\Users\gavin hurd\Documents\bioacoustics_local\data\weto\train\positive\A-11-E_20210503_230000_26_9_3-6.wav,0,1


Unnamed: 0_level_0,negative,positive
filepath,Unnamed: 1_level_1,Unnamed: 2_level_1
c:\Users\gavin hurd\Documents\bioacoustics_local\data\weto\train\negative\A-9_20210515_230000_120_8_3-6.wav,1,0
c:\Users\gavin hurd\Documents\bioacoustics_local\data\weto\train\negative\A-9_20210515_230000_60_4_0-3.wav,1,0
c:\Users\gavin hurd\Documents\bioacoustics_local\data\weto\train\negative\A-9_20210515_230000_63_5_0-3.wav,1,0


There are 590 positive samples
There are 755 negative samples


In [22]:
meta_weto_balanced = opensoundscape.data_selection.resample(meta_weto, 
                                       n_samples_per_class = 800, 
                                       upsample=True, 
                                       downsample=True, 
                                       with_replace=False, 
                                       random_state=42)

print(f"There are {np.sum(meta_weto_balanced['positive'])} positive samples")
print(f"There are {np.sum(meta_weto_balanced['negative'])} negative samples")

There are 800 positive samples
There are 800 negative samples
