Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import fixes #172

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
4452072
feat: Adds poetry
ntlhui Nov 14, 2023
f03c455
feat: Adds workflow
ntlhui Nov 14, 2023
7d74a39
fix: Adds python version limitation
Nov 14, 2023
ec86f00
fix: Limits tensorflow-io-gcs-filesystem version
Nov 14, 2023
47f535a
fix: Fixes tensorflow for Windows
Nov 14, 2023
512b4da
Update IsoAutio.py
Sean1572 Nov 14, 2023
708d8a5
Update IsoAutio.py
Sean1572 Nov 14, 2023
f83569b
fix: Fixes env, adds testing
Nov 14, 2023
66afad1
ci: Fixes workflow
Nov 14, 2023
1220470
ci: Fixes poetry testing
Nov 14, 2023
7e4d223
ci: Fixes pytest invocation
Nov 14, 2023
c528e8c
fix: Downgrades torch
Nov 14, 2023
4cf5824
ci: Fixes env
Nov 14, 2023
c4b8bca
feat: Adds support for 3.10 and docs
Nov 14, 2023
e30b39a
fix: Fixes packaging name
Nov 14, 2023
9d2a1c0
fix: Downgrades pytest
Nov 14, 2023
40b8831
fix: Upgrades pytest to latest
Nov 14, 2023
4bb128f
fix: Adds test script
Nov 14, 2023
d92165c
chore: Removes conda
Nov 14, 2023
7d0aa42
docs: Adds poetry tensorflow notes
Nov 14, 2023
58454e1
Updated TweetyNET, adjusted parameters on tut
Nov 15, 2023
a770cfe
Fixed microfaune error
Nov 15, 2023
2a4fd72
working on fixing birdnet
Nov 15, 2023
7a15d2a
added resampy to poetry (still errors with brdnet)
Nov 15, 2023
784f53c
Fixed package issues and birdnet issues
Nov 16, 2023
7c0cca5
Replaced more .append with pd.concat
Nov 16, 2023
d2cb390
Revert PyHa Tutorial
sprestrelski Nov 17, 2023
f77d93b
reran the notebook
Nov 17, 2023
4df196c
Merge branch 'import-fixes'
Nov 17, 2023
42afd95
Took out looped concat calls to follow best practice
Nov 29, 2023
55bae93
Added comments for clarity
Nov 29, 2023
574b7ed
Removed tutorial file for PR
Nov 29, 2023
362cdf4
Merge branch 'main' into import-fixes
TQZhang04 Nov 30, 2023
78d1f7e
fix: added back PyHa_Tutorial from main
May 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions PyHa/IsoAutio.py
Original file line number Diff line number Diff line change
Expand Up @@ -918,9 +918,9 @@ def generate_automated_labels_microfaune(
# print("model \"{}\" does not exist".format(ml_model))
# return None

# init labels dataframe
annotations = pd.DataFrame()
# generate local scores for every bird file in chosen directory
# initialize list of entries to add
entries_to_add = []
for audio_file in os.listdir(audio_dir):
# skip directories
if os.path.isdir(audio_dir + audio_file):
Expand Down Expand Up @@ -987,17 +987,17 @@ def generate_automated_labels_microfaune(
manual_id=manual_id,
normalize_local_scores=normalize_local_scores)
# print(new_entry)
if annotations.empty:
annotations = new_entry
else:
annotations = pd.concat([annotations, new_entry])
# append entry to list
entries_to_add.append(new_entry)
except KeyboardInterrupt:
exit("Keyboard interrupt")
except BaseException as e:
checkVerbose(e, isolation_parameters)
checkVerbose("Error in isolating bird calls from" + audio_file, isolation_parameters)

continue
# Create dataframe from entries
annotations = pd.concat(entries_to_add)
# Quick fix to indexing
annotations.reset_index(inplace=True, drop=True)
return annotations
Expand Down Expand Up @@ -1056,8 +1056,8 @@ def generate_automated_labels_tweetynet(
device = torch.device('cpu')
detector = TweetyNetModel(2, (1, 86, 86), 86, device)

# init labels dataframe
annotations = pd.DataFrame()
# init labels list
entries_to_add = []
# generate local scores for every bird file in chosen directory
for audio_file in os.listdir(audio_dir):
# skip directories
Expand Down Expand Up @@ -1128,16 +1128,16 @@ def generate_automated_labels_tweetynet(
manual_id=manual_id,
normalize_local_scores=normalize_local_scores)
# print(new_entry)
if annotations.empty:
annotations = new_entry
else:
annotations = pd.concat([annotations, new_entry])
# append entry to list
entries_to_add.append(new_entry)
except KeyboardInterrupt:
exit("Keyboard interrupt")
except BaseException as e:
checkVerbose("Error in isolating bird calls from " + audio_file, isolation_parameters)
logger.exception(f"Error in isolating bird calls from {audio_file}")
continue
# Create dataframe from entries
annotations = pd.concat(entries_to_add)
# Quick fix to indexing
annotations.reset_index(inplace=True, drop=True)
return annotations
Expand Down
4 changes: 2 additions & 2 deletions PyHa/birdnet_lite/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ def splitSignal(sig, rate, overlap, seconds=3.0, minlen=1.5):
def readAudioData(path, overlap, sample_rate=48000):

print('READING AUDIO DATA...', end=' ', flush=True)
print("Path: ", path)
# Open file with librosa (uses ffmanaeg or libav)
# print("Path: ", path)
# Open file with librosa (uses ffmpeg or libav)
try:
sig, rate = librosa.load(path, sr=sample_rate, mono=True, res_type='kaiser_fast')
clip_length = librosa.get_duration(y=sig, sr=rate)
Expand Down
41 changes: 19 additions & 22 deletions PyHa/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,13 +243,13 @@ def automated_labeling_statistics(
clips = automated_df["IN FILE"].to_list()
# Removing duplicates
clips = list(dict.fromkeys(clips))
# Initializing the returned dataframe
statistics_df = pd.DataFrame()

num_errors = 0
num_processed = 0

start_time = time.time()
# init clips list
stats_to_add = []
# Looping through each audio clip
for clip in clips:
num_processed += 1
Expand All @@ -260,18 +260,11 @@ def automated_labeling_statistics(
if stats_type == "general":
clip_stats_df = clip_general(
clip_automated_df, clip_manual_df)
if statistics_df.empty:
statistics_df = clip_stats_df
else:
statistics_df = pd.concat([statistics_df,clip_stats_df])
elif stats_type == "IoU":
IoU_Matrix = clip_IoU(clip_automated_df, clip_manual_df)
clip_stats_df = matrix_IoU_Scores(
IoU_Matrix, clip_manual_df, threshold)
if statistics_df.empty:
statistics_df = clip_stats_df
else:
statistics_df = pd.concat([statistics_df, clip_stats_df])
stats_to_add.append(clip_stats_df)
except BaseException as e:
num_errors += 1
#print("Something went wrong with: " + clip)
Expand All @@ -280,6 +273,8 @@ def automated_labeling_statistics(
if num_processed % 50 == 0:
print("Processed", num_processed, "clips in", int((time.time() - start_time) * 10) / 10.0, 'seconds')
start_time = time.time()
# Create dataframe from stats
statistics_df = pd.concat(stats_to_add)
if num_errors > 0:
checkVerbose(f"Something went wrong with {num_errors} clips out of {len(clips)} clips", verbose)
statistics_df.reset_index(inplace=True, drop=True)
Expand Down Expand Up @@ -736,8 +731,8 @@ def dataset_Catch(automated_df, manual_df):
clips = automated_df["IN FILE"].to_list()
# Removing duplicates
clips = list(dict.fromkeys(clips))
# Initializing the ouput dataframe
manual_df_with_Catch = pd.DataFrame()
# Initializing list of dfs to add
clips_to_add = []
# Looping through all of the audio clips that have been labelled.
for clip in clips:
print(clip)
Expand All @@ -748,10 +743,10 @@ def dataset_Catch(automated_df, manual_df):
Catch_Array = clip_catch(clip_automated_df, clip_manual_df)
# Appending the catch values per label onto the manual dataframe
clip_manual_df["Catch"] = Catch_Array
if manual_df_with_Catch.empty:
manual_df_with_Catch = clip_manual_df
else:
manual_df_with_Catch = pd.concat([manual_df_with_Catch,clip_manual_df])
# Append manual df to list
clips_to_add.append(clip_manual_df)
# Create dataframe out of list
manual_df_with_Catch = pd.concat(clips_to_add)
# Resetting the indices
manual_df_with_Catch.reset_index(inplace=True, drop=True)
return manual_df_with_Catch
Expand Down Expand Up @@ -812,8 +807,8 @@ def clip_statistics(
# Finding the intersection between the manual and automated classes
class_list = np.intersect1d(automated_class_list,manual_class_list)

# Initializing the output dataframe
clip_statistics = pd.DataFrame()
# Initializing the list of dfs to add
clips_to_add = []
# Looping through each class and comparing the automated labels to the manual labels
for class_ in class_list:
#print(class_)
Expand All @@ -825,7 +820,8 @@ def clip_statistics(
clip_statistics = automated_labeling_statistics(temp_automated_class_df, temp_manual_class_df, stats_type = stats_type, threshold = threshold)
else:
temp_df = automated_labeling_statistics(temp_automated_class_df, temp_manual_class_df, stats_type = stats_type, threshold = threshold)
clip_statistics = pd.concat([clip_statistics,temp_df])
clips_to_add.append(temp_df)
clip_statistics = pd.concat(clips_to_add)
clip_statistics.reset_index(inplace=True,drop=True)
return clip_statistics

Expand All @@ -847,8 +843,8 @@ def class_statistics(clip_statistics):
assert isinstance(clip_statistics,pd.DataFrame)
assert "MANUAL ID" in clip_statistics.columns

# Initializing the output dataframe
class_statistics = pd.DataFrame()
# Initializing the list of dfs to add
stats_to_add = []
# creating a list of the unique classes being passed in.
class_list = clip_statistics["MANUAL ID"].to_list()
class_list = list(dict.fromkeys(class_list))
Expand All @@ -860,6 +856,7 @@ def class_statistics(clip_statistics):
class_statistics = global_statistics(class_df, manual_id = class_)
else:
temp_df = global_statistics(class_df, manual_id = class_)
class_statistics = pd.concat([class_statistics,temp_df])
stats_to_add.append(temp_df)
class_statistics = pd.concat(stats_to_add)
class_statistics.reset_index(inplace=True,drop=True)
return class_statistics
11 changes: 7 additions & 4 deletions PyHa/tweetynet_package/tweetynet/Load_data_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,15 @@ def predictions_to_kaleidoscope(predictions, SIGNAL, audio_dir, audio_file, manu
raise BaseException("No birds were detected!!")

if offset.iloc[0] != 0:
kaleidoscope_df.append(pd.DataFrame({"OFFSET": [0], "DURATION": [offset.iloc[0]]}))
kaleidoscope_df.append(pd.DataFrame({"OFFSET": [0],
"DURATION": [offset.iloc[0]]
}))
kaleidoscope_df.append(intermediary_df[intermediary_df["DURATION"] >= 2*time_bin_seconds])

if offset.iloc[-1] < predictions.iloc[-1]["time_bins"]:
kaleidoscope_df.append(pd.DataFrame({"OFFSET": [offset.iloc[-1]], "DURATION": [predictions.iloc[-1]["time_bins"] +
predictions.iloc[1]["time_bins"]]}))
kaleidoscope_df.append(pd.DataFrame({"OFFSET": [offset.iloc[-1]],
"DURATION": [predictions.iloc[-1]["time_bins"] +
predictions.iloc[1]["time_bins"]]
}))

kaleidoscope_df = pd.concat(kaleidoscope_df)
kaleidoscope_df = kaleidoscope_df.reset_index(drop=True)
Expand Down
8 changes: 6 additions & 2 deletions PyHa/tweetynet_package/tweetynet/TweetyNetModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ def predict(self, test_dataset, model_weights=None, norm=False):
self.load_weights(os.path.join("PyHa","tweetynet_package","tweetynet","config","tweetynet_weights.h5"))

test_data_loader = DataLoader(test_dataset, batch_size=batch_size)
predictions = pd.DataFrame()
# Initialize list of predictions
preds_to_add = []
self.model.eval()
local_score = []
dataiter = iter(test_data_loader)
Expand All @@ -111,7 +112,10 @@ def predict(self, test_dataset, model_weights=None, norm=False):
bins = st_time + (int(uids[0].split("_")[0])*window_size)
d = {"uid": uids[0], "pred": pred, "label": labels, "time_bins": bins}
new_preds = pd.DataFrame(d)
predictions = pd.concat([predictions, new_preds])
# Append to list
preds_to_add.append(new_preds)
# Create df using list
predictions = pd.concat(preds_to_add)

if norm:
local_score = self.normalize(local_score, 0, 1)
Expand Down
Loading