In [1]:
import glob 
import pandas as pd

from soundbay.utils.metadata_processing import (
    bg_from_non_overlap_calls, 
    correct_call_times_with_duration, 
    non_overlap_df,
    reorder_columns_to_default_view, 
    merge_calls
)

In [5]:
files_path = '../outputs/Inference_results-2024-10-19_09*.csv'

In [6]:
raven_files = glob.glob(files_path)

In [7]:
sorted(raven_files)

['../outputs/Inference_results-2024-10-19_09-26-08-d0grath1-5756.221130122958.csv',
 '../outputs/Inference_results-2024-10-19_09-26-28-d0grath1-5756.221220162958.csv',
 '../outputs/Inference_results-2024-10-19_09-26-45-d0grath1-5756.221222152958.csv',
 '../outputs/Inference_results-2024-10-19_09-27-04-d0grath1-5756.221223092958.csv',
 '../outputs/Inference_results-2024-10-19_09-27-21-d0grath1-5756.221229032958.csv',
 '../outputs/Inference_results-2024-10-19_09-27-41-d0grath1-5756.230118024056.csv']

In [290]:
th = 0.3
raven_files = glob.glob(files_path)
df = pd.concat([
    pd.read_csv(f)
    .assign(**{
        'Begin File': lambda x: f.split('-')[-1].split('.csv')[0] + '.wav', 
        'Begin Time (s)': lambda x: x['begin_time'] + i * (840), 
        'End Time (s)': lambda x: x['end_time'] + i * (840), 
        'End File':  lambda x: x['Begin File'], 
        'View': 1, 
        'Channel': lambda x: x['channel'], 
        'label': lambda x: x[['Upsweeps','Downsweeps']].gt(th).any(axis=1).astype(int), 
        'class': lambda x: x[['Upsweeps','Downsweeps']].idxmax(axis=1), 
        'prob': lambda x: x[['Upsweeps','Downsweeps']].max(axis=1).round(3),
        'call_prob': lambda x: x[['Upsweeps','Downsweeps']].sum(axis=1).round(3),
        'file': i, 
    })
    .pipe(lambda df: df[df['Begin Time (s)'] >= (i * (840)) + 3])
    .query('label==1')
    .assign(**{      
        'next_prob': lambda x: x.prob.shift(-1).values,
        'prev_prob': lambda x: x.prob.shift(1).values,
        'next_overlap': lambda x: (x.begin_time.shift(-1) < x.end_time).values, 
        'prev_overlap': lambda x: (x.end_time.shift() > x.begin_time).values,
        'next_prob_pct_change': lambda x: (x.prob-x.prob.shift(-1)).div(x.prob).values, 
        'prev_prob_pct_change': lambda x: (x.prob-x.prob.shift(1)).div(x.prob).values, 
    })
    for i, f in enumerate(sorted(raven_files))
]).sort_values(['Begin File', 'Begin Time (s)']).reset_index(drop=True)

In [283]:
df.loc[:, 'channel': 'prob'].to_csv('overlap_calls.txt', sep='\t')

In [312]:
overlap_prob_drop = 0.1

merged_by_call_type = (
    df.loc[~(
    ((df.next_overlap) & (df.next_prob_pct_change < -overlap_prob_drop)) |
    ((df.prev_overlap) & (df.prev_prob_pct_change < -overlap_prob_drop)))]
    .pipe(non_overlap_df, overlap_pct_th=0.3)
    # .pipe(non_overlap_df, overlap_pct_th=1)
    .reset_index(drop=True)
    .assign(**{
        'end_time': lambda x: x.end_time.round(3), 
        'begin_time': lambda x: x.begin_time.round(3), 
        'Begin Time (s)': lambda x: x['begin_time'] + (x['file']*840) , 
        'End Time (s)': lambda x: x['end_time']+ (x['file']*840),
        'Selection': lambda x: x.index + 1, 
        'Low Freq (Hz)': 0, 
        'High Freq (Hz)': 48000, 
        
    })
    .loc[:, 'channel': 'High Freq (Hz)']
)

In [313]:
txt_columns = [
    'Selection',
    'View', 
    'Channel', 
    'Begin Time (s)', 
    'End Time (s)', 
    'Begin File', 
    'End File', 
    'prob', 
]

In [316]:
merged_by_call_type.shape

(173, 27)

In [317]:
merged_by_call_type[txt_columns].to_csv('predictions_all_files.txt', sep='\t')

In [319]:
(
    merged_by_call_type.groupby('Begin File')
    .apply(lambda g: 
           g
           .assign(**{'Begin Time (s)': lambda x: x['begin_time'], 'End Time (s)': lambda x: x['end_time']})
           [txt_columns]
           .to_csv(f'predictions_per_file/predictions_{g.name.split(".wav")[0]}.txt', sep='\t')
          )
)