UCSD-E4E · TQZhang04 · Nov 14, 2023 · Nov 14, 2023 · Nov 14, 2023 · Nov 14, 2023
diff --git a/PyHa/IsoAutio.py b/PyHa/IsoAutio.py
@@ -918,9 +918,9 @@ def generate_automated_labels_microfaune(
         # print("model \"{}\" does not exist".format(ml_model))
         # return None
 
-    # init labels dataframe
-    annotations = pd.DataFrame()
     # generate local scores for every bird file in chosen directory
+    # initialize list of entries to add
+    entries_to_add = []
     for audio_file in os.listdir(audio_dir):
         # skip directories
         if os.path.isdir(audio_dir + audio_file):
@@ -987,17 +987,17 @@ def generate_automated_labels_microfaune(
                 manual_id=manual_id,
                 normalize_local_scores=normalize_local_scores)
             # print(new_entry)
-            if annotations.empty:
-                annotations = new_entry
-            else:
-                annotations = pd.concat([annotations, new_entry])
+            # append entry to list
+            entries_to_add.append(new_entry)
         except KeyboardInterrupt:
             exit("Keyboard interrupt")
         except BaseException as e:
             checkVerbose(e, isolation_parameters)
             checkVerbose("Error in isolating bird calls from" + audio_file, isolation_parameters)
 
             continue
+    # Create dataframe from entries
+    annotations = pd.concat(entries_to_add)
     # Quick fix to indexing
     annotations.reset_index(inplace=True, drop=True)
     return annotations
@@ -1056,8 +1056,8 @@ def generate_automated_labels_tweetynet(
     device = torch.device('cpu')
     detector = TweetyNetModel(2, (1, 86, 86), 86, device)
 
-    # init labels dataframe
-    annotations = pd.DataFrame()
+    # init labels list
+    entries_to_add = []
     # generate local scores for every bird file in chosen directory
     for audio_file in os.listdir(audio_dir):
         # skip directories
@@ -1128,16 +1128,16 @@ def generate_automated_labels_tweetynet(
                     manual_id=manual_id,
                     normalize_local_scores=normalize_local_scores)
             # print(new_entry)
-            if annotations.empty:
-                annotations = new_entry
-            else:
-                annotations = pd.concat([annotations, new_entry])
+            # append entry to list
+            entries_to_add.append(new_entry)
         except KeyboardInterrupt:
             exit("Keyboard interrupt")
         except BaseException as e:
             checkVerbose("Error in isolating bird calls from " + audio_file, isolation_parameters)
             logger.exception(f"Error in isolating bird calls from {audio_file}")
             continue
+    # Create dataframe from entries
+    annotations = pd.concat(entries_to_add)
     # Quick fix to indexing
     annotations.reset_index(inplace=True, drop=True)
     return annotations

diff --git a/PyHa/birdnet_lite/analyze.py b/PyHa/birdnet_lite/analyze.py
@@ -85,8 +85,8 @@ def splitSignal(sig, rate, overlap, seconds=3.0, minlen=1.5):
 def readAudioData(path, overlap, sample_rate=48000):
 
     print('READING AUDIO DATA...', end=' ', flush=True)
-    print("Path: ", path)
-    # Open file with librosa (uses ffmanaeg or libav)
+    # print("Path: ", path)
+    # Open file with librosa (uses ffmpeg or libav)
     try:
         sig, rate = librosa.load(path, sr=sample_rate, mono=True, res_type='kaiser_fast')
         clip_length = librosa.get_duration(y=sig, sr=rate)

diff --git a/PyHa/statistics.py b/PyHa/statistics.py
@@ -243,13 +243,13 @@ def automated_labeling_statistics(
     clips = automated_df["IN FILE"].to_list()
     # Removing duplicates
     clips = list(dict.fromkeys(clips))
-    # Initializing the returned dataframe
-    statistics_df = pd.DataFrame()
 
     num_errors = 0
     num_processed = 0
 
     start_time = time.time()
+    # init clips list
+    stats_to_add = []
     # Looping through each audio clip
     for clip in clips:
         num_processed += 1
@@ -260,18 +260,11 @@ def automated_labeling_statistics(
             if stats_type == "general":
                 clip_stats_df = clip_general(
                     clip_automated_df, clip_manual_df)
-                if statistics_df.empty:
-                    statistics_df = clip_stats_df
-                else:
-                    statistics_df = pd.concat([statistics_df,clip_stats_df])
             elif stats_type == "IoU":
                 IoU_Matrix = clip_IoU(clip_automated_df, clip_manual_df)
                 clip_stats_df = matrix_IoU_Scores(
                     IoU_Matrix, clip_manual_df, threshold)
-                if statistics_df.empty:
-                    statistics_df = clip_stats_df
-                else:
-                    statistics_df = pd.concat([statistics_df, clip_stats_df])
+            stats_to_add.append(clip_stats_df)
         except BaseException as e:
             num_errors += 1
             #print("Something went wrong with: " + clip)
@@ -280,6 +273,8 @@ def automated_labeling_statistics(
         if num_processed % 50 == 0:
             print("Processed", num_processed, "clips in", int((time.time() - start_time) * 10) / 10.0, 'seconds')
             start_time = time.time()
+    # Create dataframe from stats
+    statistics_df = pd.concat(stats_to_add) 
     if num_errors > 0:
         checkVerbose(f"Something went wrong with {num_errors} clips out of {len(clips)} clips", verbose)
     statistics_df.reset_index(inplace=True, drop=True)
@@ -736,8 +731,8 @@ def dataset_Catch(automated_df, manual_df):
     clips = automated_df["IN FILE"].to_list()
     # Removing duplicates
     clips = list(dict.fromkeys(clips))
-    # Initializing the ouput dataframe
-    manual_df_with_Catch = pd.DataFrame()
+    # Initializing list of dfs to add
+    clips_to_add = []
     # Looping through all of the audio clips that have been labelled.
     for clip in clips:
         print(clip)
@@ -748,10 +743,10 @@ def dataset_Catch(automated_df, manual_df):
         Catch_Array = clip_catch(clip_automated_df, clip_manual_df)
         # Appending the catch values per label onto the manual dataframe
         clip_manual_df["Catch"] = Catch_Array
-        if manual_df_with_Catch.empty:
-            manual_df_with_Catch = clip_manual_df
-        else:
-            manual_df_with_Catch = pd.concat([manual_df_with_Catch,clip_manual_df])
+        # Append manual df to list
+        clips_to_add.append(clip_manual_df)
+    # Create dataframe out of list
+    manual_df_with_Catch = pd.concat(clips_to_add)
     # Resetting the indices
     manual_df_with_Catch.reset_index(inplace=True, drop=True)
     return manual_df_with_Catch
@@ -812,8 +807,8 @@ def clip_statistics(
     # Finding the intersection between the manual and automated classes
     class_list = np.intersect1d(automated_class_list,manual_class_list)
 
-    # Initializing the output dataframe
-    clip_statistics = pd.DataFrame()
+    # Initializing the list of dfs to add
+    clips_to_add = []
     # Looping through each class and comparing the automated labels to the manual labels
     for class_ in class_list:
         #print(class_)
@@ -825,7 +820,8 @@ def clip_statistics(
             clip_statistics = automated_labeling_statistics(temp_automated_class_df, temp_manual_class_df, stats_type = stats_type, threshold = threshold)
         else:
             temp_df = automated_labeling_statistics(temp_automated_class_df, temp_manual_class_df, stats_type = stats_type, threshold = threshold)
-            clip_statistics = pd.concat([clip_statistics,temp_df])
+            clips_to_add.append(temp_df)
+    clip_statistics = pd.concat(clips_to_add)
     clip_statistics.reset_index(inplace=True,drop=True)
     return clip_statistics
 
@@ -847,8 +843,8 @@ def class_statistics(clip_statistics):
     assert isinstance(clip_statistics,pd.DataFrame)
     assert "MANUAL ID" in clip_statistics.columns
 
-    # Initializing the output dataframe
-    class_statistics = pd.DataFrame()
+    # Initializing the list of dfs to add
+    stats_to_add = []
     # creating a list of the unique classes being passed in.
     class_list = clip_statistics["MANUAL ID"].to_list()
     class_list = list(dict.fromkeys(class_list))
@@ -860,6 +856,7 @@ def class_statistics(clip_statistics):
             class_statistics = global_statistics(class_df, manual_id = class_)
         else:
             temp_df = global_statistics(class_df, manual_id = class_)
-            class_statistics = pd.concat([class_statistics,temp_df])
+            stats_to_add.append(temp_df)
+    class_statistics = pd.concat(stats_to_add)
     class_statistics.reset_index(inplace=True,drop=True)
     return class_statistics
diff --git a/PyHa/tweetynet_package/tweetynet/Load_data_functions.py b/PyHa/tweetynet_package/tweetynet/Load_data_functions.py
@@ -216,12 +216,15 @@ def predictions_to_kaleidoscope(predictions, SIGNAL, audio_dir, audio_file, manu
         raise BaseException("No birds were detected!!")
 
     if offset.iloc[0] != 0:
-        kaleidoscope_df.append(pd.DataFrame({"OFFSET": [0], "DURATION": [offset.iloc[0]]}))
+        kaleidoscope_df.append(pd.DataFrame({"OFFSET": [0], 
+                                             "DURATION": [offset.iloc[0]]
+                                             }))
     kaleidoscope_df.append(intermediary_df[intermediary_df["DURATION"] >= 2*time_bin_seconds])
-
     if offset.iloc[-1] < predictions.iloc[-1]["time_bins"]:
-        kaleidoscope_df.append(pd.DataFrame({"OFFSET": [offset.iloc[-1]], "DURATION": [predictions.iloc[-1]["time_bins"] + 
-                                predictions.iloc[1]["time_bins"]]}))
+        kaleidoscope_df.append(pd.DataFrame({"OFFSET": [offset.iloc[-1]], 
+                                             "DURATION": [predictions.iloc[-1]["time_bins"] + 
+                                             predictions.iloc[1]["time_bins"]]
+                                             }))
 
     kaleidoscope_df = pd.concat(kaleidoscope_df)
     kaleidoscope_df = kaleidoscope_df.reset_index(drop=True)

diff --git a/PyHa/tweetynet_package/tweetynet/TweetyNetModel.py b/PyHa/tweetynet_package/tweetynet/TweetyNetModel.py
@@ -91,7 +91,8 @@ def predict(self, test_dataset, model_weights=None, norm=False):
             self.load_weights(os.path.join("PyHa","tweetynet_package","tweetynet","config","tweetynet_weights.h5"))
 
         test_data_loader = DataLoader(test_dataset, batch_size=batch_size)
-        predictions = pd.DataFrame()
+        # Initialize list of predictions
+        preds_to_add = []
         self.model.eval()
         local_score = []
         dataiter = iter(test_data_loader)
@@ -111,7 +112,10 @@ def predict(self, test_dataset, model_weights=None, norm=False):
                 bins = st_time + (int(uids[0].split("_")[0])*window_size)
                 d = {"uid": uids[0], "pred": pred, "label": labels, "time_bins": bins}
                 new_preds = pd.DataFrame(d)
-                predictions = pd.concat([predictions, new_preds])
+                # Append to list
+                preds_to_add.append(new_preds)
+            # Create df using list
+            predictions = pd.concat(preds_to_add)
 
         if norm:
             local_score = self.normalize(local_score, 0, 1)