In [1]:
#from microfaune_local_score import *
from PyHa.statistics import *
from PyHa.IsoAutio import *
from PyHa.visualizations import *
import pandas as pd

## Running Moment to Moment Labeling System on a Directory of WAV Files 

In [2]:
path = "./TEST/"
#path = "/home/jacob/Desktop/EngineersForExploration/Data/Relevant BirdCLEF info/audio/"

### Dictionary that defines the isolation technique and parameters used to generate automated labels 

In [3]:
isolation_parameters = {
    "technique" : "steinberg",
    "threshold_type" : "median",
    "threshold_const" : 2.0,
    "threshold_min" : 0.0,
    "window_size" : 2.0,
    "chunk_size" : 5.0
}

In [4]:
automated_df = generate_automated_labels(path,isolation_parameters,normalize_local_scores=True)

### Pandas Ouput of Automated Labeling 

In [5]:
automated_df

Unnamed: 0,FOLDER,IN FILE,CHANNEL,CLIP LENGTH,SAMPLE RATE,OFFSET,DURATION,MANUAL ID
0,./TEST/,20190622_210000.WAV,0,60.0,44100,0.0,34.056122,bird
1,./TEST/,20190622_210000.WAV,0,60.0,44100,37.229116,9.005578,bird
2,./TEST/,20190622_210000.WAV,0,60.0,44100,51.588231,6.755442,bird
3,./TEST/,20190623_222000.WAV,0,60.0,44100,0.0,6.567347,bird
4,./TEST/,20190623_222000.WAV,0,60.0,44100,12.106463,2.0,bird
5,./TEST/,20190623_222000.WAV,0,60.0,44100,18.276939,8.472041,bird
6,./TEST/,BlackFacedAntbird1.wav,0,31.216327,44100,0.0,1.371156,bird
7,./TEST/,BlackFacedAntbird1.wav,0,31.216327,44100,2.827551,3.438231,bird
8,./TEST/,BlackFacedAntbird1.wav,0,31.216327,44100,6.423129,2.301565,bird
9,./TEST/,BlackFacedAntbird1.wav,0,31.216327,44100,8.812449,2.742313,bird


### Function that gathers statistics about the duration of labels 

In [6]:
annotation_duration_statistics(automated_df)

Unnamed: 0,COUNT,MODE,MEAN,STANDARD DEVIATION,MIN,Q1,MEDIAN,Q3,MAX
0,31,2.0,5.353472,5.725178,1.371156,2.603129,4.157347,5.282415,34.056122


### Human Manual Labels for Comparison 

In [7]:
manual_df = pd.read_csv("Manual_Labels.csv")
#manual_df = pd.read_csv("BirdCLEF2020_Validation.csv")
manual_df

Unnamed: 0,FOLDER,IN FILE,CLIP LENGTH,CHANNEL,OFFSET,DURATION,SAMPLING RATE,MANUAL ID,TIME SPENT
0,./TEST/,20190622_210000.WAV,60.0,0,1.125,0.42,384000,bird,405.916
1,./TEST/,20190622_210000.WAV,60.0,0,2.155,0.38,384000,bird,405.919
2,./TEST/,20190622_210000.WAV,60.0,0,2.625,0.29,384000,bird,405.919
3,./TEST/,20190622_210000.WAV,60.0,0,3.085,0.41,384000,bird,405.919
4,./TEST/,20190622_210000.WAV,60.0,0,1.605,0.35,384000,bird,405.919
...,...,...,...,...,...,...,...,...,...
103,./TEST/,20190624_152000.WAV,60.0,0,4.095,0.15,384000,bird,137.624
104,./TEST/,20190624_152000.WAV,60.0,0,10.915,0.11,384000,bird,137.627
105,./TEST/,20190624_152000.WAV,60.0,0,28.005,0.37,384000,bird,137.627
106,./TEST/,20190624_152000.WAV,60.0,0,23.395,0.16,384000,bird,137.627


In [8]:
annotation_duration_statistics(manual_df)

Unnamed: 0,COUNT,MODE,MEAN,STANDARD DEVIATION,MIN,Q1,MEDIAN,Q3,MAX
0,108,0.49,1.037133,1.179231,0.11,0.45,0.67,1.0775,9.15


### Helper function to convert to kaleidoscope-compatible format 

In [9]:
kaleidoscope_conversion(manual_df)

Unnamed: 0,FOLDER,IN FILE,CHANNEL,OFFSET,DURATION,MANUAL ID
0,./TEST/,20190622_210000.WAV,0,1.125,0.42,bird
1,./TEST/,20190622_210000.WAV,0,2.155,0.38,bird
2,./TEST/,20190622_210000.WAV,0,2.625,0.29,bird
3,./TEST/,20190622_210000.WAV,0,3.085,0.41,bird
4,./TEST/,20190622_210000.WAV,0,1.605,0.35,bird
...,...,...,...,...,...,...
103,./TEST/,20190624_152000.WAV,0,4.095,0.15,bird
104,./TEST/,20190624_152000.WAV,0,10.915,0.11,bird
105,./TEST/,20190624_152000.WAV,0,28.005,0.37,bird
106,./TEST/,20190624_152000.WAV,0,23.395,0.16,bird


## Different Combinations of Local Score Visualization Graphs

### Baseline Graph without any annotations

In [None]:
clip_path = "./TEST/ScreamingPiha2.wav"
local_score_visualization(clip_path)

### Baseline graph with log scale

In [None]:
local_score_visualization(clip_path,log_scale = True)

### Baseline graph with normalized local score values between [0,1] 

In [None]:
local_score_visualization(clip_path, normalize_local_scores = True)

### Graph with Automated Labeling 

In [None]:
local_score_visualization(clip_path,automated_df = True, isolation_parameters = isolation_parameters)

### Graph with Human Labelling

In [None]:
local_score_visualization(clip_path,human_df = manual_df[manual_df["IN FILE"] == "ScreamingPiha2.wav"])

### Graph with Both Automated and Human Labels 
    Orange ==> True Positive
    Red ==> False Negative
    Yellow ==> False Positive
    White ==> True Negative

In [None]:
local_score_visualization(clip_path,automated_df = True,isolation_parameters=isolation_parameters,human_df = manual_df[manual_df["IN FILE"] == "ScreamingPiha2.wav"])

### Another Visualization of True Positives, False Positives, False Negatives, and True Negatives 

In [None]:
automated_piha_df = automated_df[automated_df["IN FILE"] == "ScreamingPiha2.wav"]
manual_piha_df = manual_df[manual_df["IN FILE"] == "ScreamingPiha2.wav"]
piha_stats = plot_bird_label_scores(automated_piha_df,manual_piha_df)

### Function that generates statistics to gauge efficacy of automated labeling compared to human labels 

In [None]:
statistics_df = automated_labeling_statistics(automated_df,manual_df,stats_type = "general")
statistics_df

### Function that takes the statistical ouput of all of the clips and gets the equivalent global scores 

In [None]:
global_dataset_statistics(statistics_df)

### Function that takes in the manual and automated labels for a clip and outputs human label-by-label IoU Scores. Used to derive statistics that measure how well a system is isolating desired segments of audio clips

In [None]:
Intersection_over_Union_Matrix = clip_IoU(automated_piha_df,manual_piha_df)
print(Intersection_over_Union_Matrix)

### Function that turns the IoU Matrix of a clip into true positive and false positives values, as well as computing the precision, recall, and F1 statistics

In [None]:
matrix_IoU_Scores(Intersection_over_Union_Matrix,manual_piha_df,0.5)

### Wrapper function that takes matrix_IoU_Scores across multiple clips. Allows user to modify the threshold that determines whether or not a label is a true positive.

In [None]:
stats_df = automated_labeling_statistics(automated_df,manual_df,stats_type = "IoU",threshold = 0.5)
stats_df

### Function that takes the output of dataset_IoU Statistics and ouputs a global count of true positives and false positives, as well as computing common metrics across the dataset

In [None]:
global_stats_df = global_IoU_Statistics(stats_df)
global_stats_df