Skip to content

Commit

Permalink
Generalizing file reading with librosa (#122)
Browse files Browse the repository at this point in the history
  • Loading branch information
pranjalc1 committed Jul 19, 2022
1 parent fa91052 commit b58b13e
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 23 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ TEST/*
mixed_bird_manual.csv
outputs/result.csv
*.wav
*.mp3
*.flac
*.pyc
outputs/*
*.ipynb
!PyHa_Tutorial.ipynb
*.csv
!ScreamingPiha_Manual_Labels.csv
36 changes: 18 additions & 18 deletions PyHa/IsoAutio.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#from PyHa.tweetynet_package.tweetynet.network import TweetyNet
from .birdnet_lite.analyze import analyze
from .microfaune_package.microfaune.detection import RNNDetector
from .microfaune_package.microfaune import audio
from .tweetynet_package.tweetynet.TweetyNetModel import TweetyNetModel
from .tweetynet_package.tweetynet.Load_data_functions import compute_features, predictions_to_kaleidoscope
import os
import torch
import librosa
import pandas as pd
import scipy.signal as scipy_signal
import numpy as np
import math
import os
from .birdnet_lite.analyze import analyze
from math import ceil
from copy import deepcopy

def build_isolation_parameters_microfaune(
Expand Down Expand Up @@ -60,7 +60,6 @@ def build_isolation_parameters_microfaune(
"threshold_type": threshold_type,
"threshold_const": threshold_const,
"threshold_min": threshold_min,
"window_size": window_size,
"chunk_size": chunk_size
}

Expand Down Expand Up @@ -636,7 +635,7 @@ def chunk_isolate(
'MANUAL ID': manual_id}

# calculating the number of chunks that define an audio clip
chunk_count = math.ceil(
chunk_count = ceil(
len(SIGNAL) / (isolation_parameters["chunk_size"] * SAMPLE_RATE))
# calculating the number of local scores per second
scores_per_second = len(local_scores) / old_duration
Expand Down Expand Up @@ -736,6 +735,7 @@ def generate_automated_labels_birdnet(audio_dir, isolation_parameters):
def generate_automated_labels_microfaune(
audio_dir,
isolation_parameters,
ml_model = "microfaune",
manual_id="bird",
weight_path=None,
normalized_sample_rate=44100,
Expand Down Expand Up @@ -777,6 +777,8 @@ def generate_automated_labels_microfaune(
# Use Custom weights for Microfaune Detector
else:
detector = RNNDetector(weight_path)
# print("model \"{}\" does not exist".format(ml_model))
# return None

# init labels dataframe
annotations = pd.DataFrame()
Expand All @@ -786,13 +788,12 @@ def generate_automated_labels_microfaune(
if os.path.isdir(audio_dir + audio_file):
continue

# It is a bit awkward here to be relying on Microfaune's wave file
# reading when we want to expand to other frameworks,
# Likely want to change that in the future. Librosa had some troubles.

# Reading in the wave audio files
# Reading in the audio files using librosa, converting to single channeled data with original sample rate
# Reason for the factor for the signal is explained here: https://stackoverflow.com/questions/53462062/pyaudio-bytes-data-to-librosa-floating-point-time-series
# Librosa scales down to [-1, 1], but the models require the range [-32768, 32767]
try:
SAMPLE_RATE, SIGNAL = audio.load_wav(audio_dir + audio_file)
SIGNAL, SAMPLE_RATE = librosa.load(audio_dir + audio_file, sr=None, mono=True)
SIGNAL = SIGNAL * 32768
except BaseException:
print("Failed to load", audio_file)
continue
Expand Down Expand Up @@ -905,13 +906,12 @@ def generate_automated_labels_tweetynet(
if os.path.isdir(audio_dir + audio_file):
continue

# It is a bit awkward here to be relying on Microfaune's wave file
# reading when we want to expand to other frameworks,
# Likely want to change that in the future. Librosa had some troubles.

# Reading in the wave audio files
# Reading in the audio files using librosa, converting to single channeled data with original sample rate
# Reason for the factor for the signal is explained here: https://stackoverflow.com/questions/53462062/pyaudio-bytes-data-to-librosa-floating-point-time-series
# Librosa scales down to [-1, 1], but the models require the range [-32768, 32767], so the multiplication is required
try:
SAMPLE_RATE, SIGNAL = audio.load_wav(audio_dir + audio_file)
SIGNAL, SAMPLE_RATE = librosa.load(audio_dir + audio_file, sr=None, mono=True)
SIGNAL = SIGNAL * 32768
except BaseException:
print("Failed to load", audio_file)
continue
Expand Down
7 changes: 4 additions & 3 deletions PyHa/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def clip_general(automated_df, human_df):
# print(SIGNAL.shape)
human_arr = np.zeros((int(SAMPLE_RATE * duration),))
bot_arr = np.zeros((int(SAMPLE_RATE * duration),))

folder_name = automated_df["FOLDER"].to_list()[0]
clip_name = automated_df["IN FILE"].to_list()[0]
# Placing 1s wherever the au
Expand All @@ -93,7 +93,7 @@ def clip_general(automated_df, human_df):

human_arr_flipped = 1 - human_arr
bot_arr_flipped = 1 - bot_arr

true_positive_arr = human_arr * bot_arr
false_negative_arr = human_arr * bot_arr_flipped
false_positive_arr = human_arr_flipped * bot_arr
Expand Down Expand Up @@ -207,7 +207,8 @@ def automated_labeling_statistics(
for clip in clips:
num_processed += 1
clip_automated_df = automated_df[automated_df["IN FILE"] == clip]
clip_manual_df = manual_df[manual_df["IN FILE"] == clip]
# In case the extension for manual_df is different from the clip extension, just check the name before the extension
clip_manual_df = manual_df[manual_df["IN FILE"].str.startswith(".".join(clip.split(".")[:-1]))]
try:
if stats_type == "general":
clip_stats_df = clip_general(
Expand Down
8 changes: 6 additions & 2 deletions PyHa/visualizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .tweetynet_package.tweetynet.TweetyNetModel import TweetyNetModel
from .tweetynet_package.tweetynet.Load_data_functions import compute_features
import torch
import librosa
import matplotlib.pyplot as plt
import pandas as pd
import scipy.signal as scipy_signal
Expand Down Expand Up @@ -279,9 +280,12 @@ def spectrogram_visualization(
None
"""

# Loading in the clip with Microfaune's built-in loading function
# Reading in the audio file using librosa, converting to single channeled data with original sample rate
# Reason for the factor for the signal is explained here: https://stackoverflow.com/questions/53462062/pyaudio-bytes-data-to-librosa-floating-point-time-series
# Librosa scales down to [-1, 1], but the models require the range [-32768, 32767], so the multiplication is required
try:
SAMPLE_RATE, SIGNAL = audio.load_wav(clip_path)
SIGNAL, SAMPLE_RATE = librosa.load(clip_path, sr=None, mono=True)
SIGNAL = SIGNAL * 32768
except BaseException:
print("Failure in loading", clip_path)
return
Expand Down

0 comments on commit b58b13e

Please sign in to comment.