In [None]:
!pip install soundfile
!pip install pydub

import numpy as np
import pandas as pd
from pydub import AudioSegment # mp3

import h5py

## Connect to google drive

In [None]:
# Run this cell to mount to google drive if using Google Colab.
from google.colab import drive
import os

drive.mount('/content/drive')
os.chdir('/content/drive/')

In [None]:
# Auto re-load utils from util py files
%load_ext autoreload
%autoreload 2

In [None]:
# Load functions in util files
%cd "path/to/py/util/folder/"
!ls

from dataset_process_utils import *
from classification_utils import *

## HDF5 storage

In [None]:
hdf5_file = 'file-to-save-img-samples.hdf5'

In [None]:
# Close any opened files in case opened previously in other cells
try:
  f.close()
except:
  pass

# create the h5py file storage
f = h5py.File(hdf5_file, 'w')

data = f.create_dataset("specs", (0,224,224,3,), maxshape=(None,None,None,None,), chunks=True)
data = f.create_dataset("labels", (0,),  maxshape=(None,), chunks=True, dtype="S20")
data = f.create_dataset("sample_source", (0,),  maxshape=(None,), chunks=True, dtype="S100")

f.close()

## Get samples and dump to hdf5

In [None]:
audio_folder = "path/to/audio/folder/"
frog_audio_contents = os.listdir(audio_folder)
frog_audio_folders = [x for x in frog_audio_contents if os.path.isdir(audio_folder + x)]

print(frog_audio_contents)
print(frog_audio_folders)

In [None]:
sec_used = 1 # seperate song every 1 seconds
max_freq = 2500 # cut frequencies at xxhz
two_channel_audios = []

cur_audio_name = ""
for frog_audio_folder in frog_audio_folders:
  print(frog_audio_folder)

  cur_folder_path = audio_folder + frog_audio_folder + "/"
  if frog_audio_folder == "Rana-draytonii":
    cur_label_path = audio_folder + frog_audio_folder + "-relabeled.txt"
  else:
    cur_label_path = audio_folder + frog_audio_folder + ".txt"

  if frog_audio_folder in ["noise-or-background"]: # deal with background noises later
    print("Skipping...")
    continue
  print("Using label file:", cur_label_path)

  df = pd.read_csv(cur_label_path, sep = "\t")
  for index, row in df.iterrows():
    cur_label_audio_file = cur_folder_path + row["Begin File"]
    cur_start_time = row["File Offset (s)"]
    cur_delta_time = row["Delta Time (s)"]
    cur_end_time = cur_start_time + cur_delta_time
    cur_intervals = split_time_interval(cur_start_time, cur_end_time, cur_delta_time, sec_used)
    cur_selection = row["Selection"]

    # Retrieve spectrogram if move to a new audio file
    if  cur_label_audio_file != cur_audio_name:
      cur_audio_name = cur_label_audio_file
      print("Dumping file:", cur_audio_name)
      freq, t, spec = retrieve_spectrogram(cur_audio_name, max_freq = max_freq, one_sided =(cur_audio_name in two_channel_audios))
    if len(freq) == 0:
      continue

    for [start, end] in cur_intervals:
      start_ind, end_ind = find_time_index(start, end, t)
      cur_spec = np.copy(spec)[:len(freq), start_ind:end_ind]
      spec_resized = resize_spec(cur_spec, x_ratio = (end - start) / sec_used, log_scale=True)
      sample_source_to_add = "%s, start: %f, end: %f, selection: %d" % (cur_audio_name, start, end, cur_selection)
      add_to_hdf5(hdf5_file, spec_resized, frog_audio_folder, sample_source_to_add)

In [None]:
# Dump background noises
frog_audio_folder = "noise-or-background"
cur_folder_path = audio_folder + frog_audio_folder + "/"
label_file = cur_folder_path + frog_audio_folder + ".csv"
cur_sample_labels = []
with open(label_file, newline='') as csvfile:
  lines = csvfile.read().split("\n")
  for line in lines:
    if not line:
      # skip empty line
      continue
    # format: e.g. 2021-11-07/dawn_2021-11-07-07:47
    cur_sample_labels.append([line.split("/")[1].replace(":", "_")])

cur_audio_name = ""
for cur_sample in cur_sample_labels:
  # update spectrogram if go to a new file
  cur_audio_name = cur_folder_path + cur_sample[0] + ".wav"
  print("Dumping file:", cur_audio_name)
  freq, t, spec = retrieve_spectrogram(cur_audio_name, max_freq = max_freq)

  # split the chunk
  cur_intervals = split_time_interval(0, t[-1], t[-1], sec_used)
  for [start, end] in cur_intervals:
    start_ind, end_ind = find_time_index(start, end, t)

    cur_spec = np.copy(spec)[:len(freq), start_ind:end_ind]
    spec_resized = resize_spec(cur_spec, x_ratio = (end - start) / sec_used, log_scale=True)
    sample_source_to_add = "%s, start: %f, end: %f" % (cur_audio_name, start, end)
    add_to_hdf5(hdf5_file, spec_resized, frog_audio_folder, sample_source_to_add)

## Convert mp3 to wav

In [None]:
# all the sound files
audio_folder = "audio/"
frog_audio_folders = os.listdir(audio_folder)

for frog_audio_folder in frog_audio_folders:
  # skip on background so far
  if frog_audio_folder == "noise-or-background":
    continue
  cur_folder_path = audio_folder + frog_audio_folder + "/"
  cur_audios = set(os.listdir(cur_folder_path))

  for cur_audio in cur_audios:
    #open file (supports all ffmpeg supported filetypes)
    print(cur_folder_path + cur_audio)
    if not ".mp3" in cur_audio:
      continue
    audio = AudioSegment.from_file(cur_folder_path + cur_audio)
    #set to mono
    audio = audio.set_channels(1)
    #set to 44.1 KHz
    audio = audio.set_frame_rate(44100)
    #save as wav
    wav_name = cur_folder_path + cur_audio[:-4] + ".wav"
    if not os.path.exists(wav_name):
      audio.export(wav_name, format="wav")