In [1]:
import pandas as pd
import os
import numpy as np

pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000
pd.options.display.max_colwidth = None

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

In [2]:
from soundbay.utils.metadata_processing import load_n_adapt_raven_annotation_table_to_dv_dataset_requirements

# params

In [3]:
annotations_dir = '../datasets/2021_annotations/'
cols2drop = ['View', 'Channel', 'Low Freq (Hz)', 'High Freq (Hz)', 'Delta Time (s)', 'Delta Freq (Hz)',
             'Avg Power Density (dB FS/Hz)']

# load annotations

In [4]:
df_list = []
for filename in os.listdir(annotations_dir):
    try:
        #         print(filename)
        annotation_file_path = os.path.join(annotations_dir, filename)
        small_df = load_n_adapt_raven_annotation_table_to_dv_dataset_requirements(annotation_file_path)
        df_list.append(small_df)
    except UnicodeDecodeError:
        print('exception:', 'UnicodeDecodeError')
        print('occured in file:', filename)
        continue
    except Exception as e:
        print('exception:', e)
        print('occured in file:', filename)
        continue
        

print(len(df_list))
df_all_annotations = pd.concat(df_list)
df_all_annotations = df_all_annotations.drop(cols2drop, axis=1)
# df_all_annotations.head()

exception: UnicodeDecodeError
occured in file: 210904-093942_Tr2.Table.1.selections.txt
exception: UnicodeDecodeError
occured in file: 210904-093942_Tr1.Table.1.selections- Annotated.txt
exception: 'end_time'
occured in file: 210904-111316_Tr2restofrecord.txt
33


In [5]:
df_all_annotations.shape
df_all_annotations.head()
df_all_annotations.tail()

(14898, 6)

Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length
0,1,2.008108,2.699544,,210825-135601_Tr1,0.691436
1,3,5.619289,6.185679,,210825-135601_Tr1,0.566389
2,4,8.171719,8.716042,,210825-135601_Tr1,0.544322
3,5,11.245463,12.098725,,210825-135601_Tr1,0.853262
4,6,16.927039,17.45665,,210825-135601_Tr1,0.529611


Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length
763,777,1193.463021,1194.461811,,210824-104507_Tr1.txt,0.99879
764,778,1197.903048,1198.732551,,210824-104507_Tr1.txt,0.829504
765,779,1209.800186,1211.069834,,210824-104507_Tr1.txt,1.269648
766,780,1213.084343,1214.328598,,210824-104507_Tr1.txt,1.244255
767,781,1214.514813,1215.005744,,210824-104507_Tr1.txt,0.490931


# explore labels

In [6]:
df_all_annotations.Annotation.unique()

array([nan, '?', 'Dolphins?', 'un', 'sc', 'un ', 'un !!! What ?', 'sc ?',
       'SC', 'd', 'un- weird whale sound probably', 'un d/w', 'dolphin',
       'd / cs ?', 'sc \\ d \\ un', 'cs ?', 'un \\ d ?', 'sc ? un',
       'few calls in this annotation', 'baby whale?', 'song (s)', 's',
       'un/d', 'un ?', 'w & un', '!', 'sc?', 'd?', 'ד'], dtype=object)

In [7]:
df_all_annotations.Annotation.value_counts()

sc                                915
un                                449
d                                 320
s                                 142
?                                  47
sc ?                               14
un d/w                              7
d?                                  4
dolphin                             3
Dolphins?                           3
SC                                  2
un/d                                2
un ?                                1
!                                   1
w & un                              1
baby whale?                         1
sc?                                 1
song (s)                            1
cs ?                                1
few calls in this annotation        1
sc ? un                             1
un \ d ?                            1
sc \ d \ un                         1
d / cs ?                            1
un- weird whale sound probably      1
un !!! What ?                       1
un          

For labels that appear only once - find location

In [8]:
all_unique_labels = df_all_annotations.Annotation.value_counts()[
    df_all_annotations.Annotation.value_counts() == 1].index.values

In [9]:
all_unique_labels

array(['un ?', '!', 'w & un', 'baby whale?', 'sc?', 'song (s)', 'cs ?',
       'few calls in this annotation', 'sc ? un', 'un \\ d ?',
       'sc \\ d \\ un', 'd / cs ?', 'un- weird whale sound probably',
       'un !!! What ?', 'un ', 'ד'], dtype=object)

In [10]:
df_unique_labels = df_all_annotations[df_all_annotations.Annotation.isin(all_unique_labels)].copy()

In [11]:
df_unique_labels.columns

Index(['Selection', 'begin_time', 'end_time', 'Annotation', 'filename',
       'call_length'],
      dtype='object')

In [12]:
cols2keep = ['Selection', 'begin_time', 'end_time', 'Annotation', 'filename','call_length']

In [13]:
df_unique_labels = df_unique_labels[cols2keep]

In [14]:
df_unique_labels.to_csv('unique_labels_info.csv', index=False)

change and filter labels to desired values depending on use-case (eg. change undefined and dolphin labels to bg)

In [15]:
def change_annotations(df_of_annotations: pd.DataFrame, annotations_to_change: list, target_value: str) -> None:
    """change specified annotations to target value. do this inplace."""
    df_of_annotations.replace(to_replace=annotations_to_change, value=target_value, inplace=True, limit=None, regex=False)
    return

def filter_annotations(df_of_annotations: pd.DataFrame, annotations_to_keep: list) -> None:
    """turn all annotations that were not specified as "keep", into 'bg'. do this inplace."""
    all_annotations = df_of_annotations.Annotation.unique()
    annotations_to_filter = list(set(all_annotations).difference(annotations_to_keep))
    df_of_annotations.replace(to_replace=annotations_to_filter, value='bg', inplace=True, limit=None, regex=False)
    return

In [16]:
df_all_annotations.Annotation.unique()

array([nan, '?', 'Dolphins?', 'un', 'sc', 'un ', 'un !!! What ?', 'sc ?',
       'SC', 'd', 'un- weird whale sound probably', 'un d/w', 'dolphin',
       'd / cs ?', 'sc \\ d \\ un', 'cs ?', 'un \\ d ?', 'sc ? un',
       'few calls in this annotation', 'baby whale?', 'song (s)', 's',
       'un/d', 'un ?', 'w & un', '!', 'sc?', 'd?', 'ד'], dtype=object)

In [17]:
annotations_to_change = ['SC', 'sc ?', 'un- weird whale sound probably', 'cs ?', 'baby whale?', 'song (s)', 's', 'sc?']
annotations_to_keep = ['sc']
target_value = 'sc'

change_annotations(df_all_annotations, annotations_to_change, target_value)
filter_annotations(df_all_annotations, annotations_to_keep)


In [18]:
df_all_annotations.Annotation.unique()
df_all_annotations.shape

array(['bg', 'sc'], dtype=object)

(14898, 6)

# label background

Methodology: all gaps between annotated segments are considered background

In [19]:
df_all_annotations.shape
df_all_annotations.filename.nunique()
# df_all_annotations.head()

(14898, 6)

33

In [20]:
df = df_all_annotations


In [21]:
# for filename in df.filename.unique():
#     print(filename)
df.overlap.unique()

AttributeError: 'DataFrame' object has no attribute 'overlap'

## merge overlapping calls

In [24]:

def merge_overlapping_calls_one_iteration(df: pd.DataFrame) -> pd.DataFrame:
    """
    Receives an annotation dataframe with (possibly) overlapping calls, and goes through one iteration of merging them.
    Depending on original input, returned dataframe may still have some (though less) overlapping calls.
    In order to merge all overlaps, this needs to be iterated.
    :param df: Pandas DataFrame with the following columns: ['filename', 'begin_time', 'end_time']
    :return: pd.DataFrame
    """
    df = df.sort_values(['filename', 'begin_time']).reset_index(drop=True)
    df['overlap'] = np.NaN

    cnt = 0
    
    while ~((df.overlap.nunique() == 1) and (df.overlap.unique()[0] == 0)):
        cnt += 1
        print('df.overlap.nunique: ', (df.overlap.nunique()))
        print(f'df.overlap.unique(): {df.overlap.unique()}')
        print(df.overlap.value_counts(dropna=False), '\n')
        # create cols and such
        df['overlap'] = np.NaN
        df['next_begin_time'] = df.groupby('filename').begin_time.shift(-1)
        df['next_end_time'] = df.groupby('filename').end_time.shift(-1)

        # mark overlap:
        df.loc[df.next_begin_time < df.end_time, 'overlap'] = 1
        df = merge_rows_marked_as_overlapping(df)
        df.loc[(df.begin_time > df.end_time.shift(1)) & (df.end_time < df.begin_time.shift(-1)), 'overlap'] = 0
        if cnt>50:
            break

    return df


def merge_rows_marked_as_overlapping(df):
    df.loc[df.overlap == 1, 'end_time'] = df[df.overlap == 1]['next_end_time']
    df = df.drop_duplicates(subset=['filename', 'end_time'], keep='first')
    return df


In [25]:
df_no_overlap = merge_overlapping_calls_one_iteration(df)

df.overlap.nunique:  0
df.overlap.unique(): [nan]
NaN    14898
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique(): [ 1.  0. nan]
0.0    10195
1.0     2132
NaN      123
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique(): [nan  0.  1.]
0.0    10658
1.0      888
NaN      147
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique(): [nan  0.  1.]
0.0    10798
1.0      454
NaN      157
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique(): [nan  0.  1.]
0.0    10865
1.0      243
NaN      159
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique(): [nan  0.  1.]
0.0    10898
NaN      160
1.0      135
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique(): [nan  0.  1.]
0.0    10920
NaN      160
1.0       72
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique(): [nan  0.  1.]
0.0    10924
NaN      160
1.0       49
Name: overlap, dtype: int64 

df.overlap.nunique

In [26]:
df.shape
df_no_overlap.shape

(14898, 6)

(11099, 9)

In [27]:
df.head()

Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length
0,1,2.008108,2.699544,bg,210825-135601_Tr1,0.691436
1,3,5.619289,6.185679,bg,210825-135601_Tr1,0.566389
2,4,8.171719,8.716042,bg,210825-135601_Tr1,0.544322
3,5,11.245463,12.098725,bg,210825-135601_Tr1,0.853262
4,6,16.927039,17.45665,bg,210825-135601_Tr1,0.529611


In [28]:
df_no_overlap.head()

Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length,overlap,next_begin_time,next_end_time
0,1,59.532117,60.937416,sc,180913_081527 (1),1.115795,,62.13765,63.434385
2,3,62.13765,63.434385,sc,180913_081527 (1),1.471644,0.0,64.068321,65.576152
4,178,64.068321,65.576152,sc,180913_081527 (1),1.507831,0.0,66.978572,68.263244
5,5,66.978572,68.263244,sc,180913_081527 (1),1.284672,0.0,68.329589,69.511729
6,6,68.329589,69.511729,sc,180913_081527 (1),1.18214,0.0,71.271167,72.754873


In [52]:
df_bg = df[['filename', 'begin_time', 'end_time']].sort_values(['filename', 'begin_time']).reset_index(drop=True).copy()


In [53]:
df_bg['next_begin_time'] = df_bg.groupby('filename').begin_time.shift(-1)
df_bg['next_end_time'] = df_bg.groupby('filename').end_time.shift(-1)
df_bg['overlap'] = np.NaN
df_bg['merged'] = 0

In [54]:
df_bg

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,overlap,merged
0,180913_081527 (1),59.532117,60.647912,59.652743,60.937416,,0
1,180913_081527 (1),59.652743,60.937416,62.137650,63.609293,,0
2,180913_081527 (1),62.137650,63.609293,62.559843,63.434385,,0
3,180913_081527 (1),62.559843,63.434385,64.068321,65.576152,,0
4,180913_081527 (1),64.068321,65.576152,66.978572,68.263244,,0
...,...,...,...,...,...,...,...
14893,25-115438_Tr2,579.833247,580.553433,580.662828,581.683851,,0
14894,25-115438_Tr2,580.662828,581.683851,581.838828,582.759572,,0
14895,25-115438_Tr2,581.838828,582.759572,583.115107,584.281991,,0
14896,25-115438_Tr2,583.115107,584.281991,584.327572,585.175386,,0


Where the next segment's begin_time is before this segment's end time - merge with next segment. Meaning, end_time will be replaced with next_end_time.

In [55]:
df_bg.head(10)

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,overlap,merged
0,180913_081527 (1),59.532117,60.647912,59.652743,60.937416,,0
1,180913_081527 (1),59.652743,60.937416,62.13765,63.609293,,0
2,180913_081527 (1),62.13765,63.609293,62.559843,63.434385,,0
3,180913_081527 (1),62.559843,63.434385,64.068321,65.576152,,0
4,180913_081527 (1),64.068321,65.576152,66.978572,68.263244,,0
5,180913_081527 (1),66.978572,68.263244,68.329589,69.511729,,0
6,180913_081527 (1),68.329589,69.511729,71.271167,73.158972,,0
7,180913_081527 (1),71.271167,73.158972,71.590827,72.754873,,0
8,180913_081527 (1),71.590827,72.754873,74.972095,76.081859,,0
9,180913_081527 (1),74.972095,76.081859,74.984157,76.172329,,0


Mark overlapping segments

In [56]:
df_bg.loc[df_bg.next_begin_time < df_bg.end_time, 'overlap'] = 1


In [57]:
df_bg.loc[(df_bg.begin_time > df_bg.end_time.shift(1)) & (df_bg.end_time < df_bg.begin_time.shift(-1)), 'overlap'] = 0

In [58]:
df_bg.overlap.value_counts(dropna=False)

0.0    8534
1.0    3799
NaN    2565
Name: overlap, dtype: int64

Merge

In [59]:
df_bg.loc[df_bg.overlap == 1, 'end_time'] = df_bg[df_bg.overlap == 1]['next_end_time']
df_bg.loc[df_bg.overlap == 1, 'merged'] = 1
df_bg

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,overlap,merged
0,180913_081527 (1),59.532117,60.937416,59.652743,60.937416,1.0,1
1,180913_081527 (1),59.652743,60.937416,62.137650,63.609293,,0
2,180913_081527 (1),62.137650,63.434385,62.559843,63.434385,1.0,1
3,180913_081527 (1),62.559843,63.434385,64.068321,65.576152,,0
4,180913_081527 (1),64.068321,65.576152,66.978572,68.263244,0.0,0
...,...,...,...,...,...,...,...
14893,25-115438_Tr2,579.833247,580.553433,580.662828,581.683851,,0
14894,25-115438_Tr2,580.662828,581.683851,581.838828,582.759572,0.0,0
14895,25-115438_Tr2,581.838828,582.759572,583.115107,584.281991,0.0,0
14896,25-115438_Tr2,583.115107,584.281991,584.327572,585.175386,0.0,0


In [60]:
df_cln = df_bg.drop_duplicates(subset=['filename', 'end_time'], keep='first')

In [61]:
df_bg.shape
df_cln.shape
df_bg.shape[0] - df_cln.shape[0]

print('\noverlap:')
df_bg.overlap.value_counts(dropna=False)
df_cln.overlap.value_counts(dropna=False)

print('\nmerged:')
df_bg.merged.value_counts(dropna=False)
df_cln.merged.value_counts(dropna=False)


(14898, 7)

(12450, 7)

2448


overlap:


0.0    8534
1.0    3799
NaN    2565
Name: overlap, dtype: int64

0.0    8534
1.0    3793
NaN     123
Name: overlap, dtype: int64


merged:


0    11099
1     3799
Name: merged, dtype: int64

0    8657
1    3793
Name: merged, dtype: int64

### sanity: look for overlap==0 rows that were dropped

In [62]:
dropped_overlap_0s = [x for x in df_bg[df_bg.overlap == 0].index.values if
                      x not in df_cln[df_cln.overlap == 0].index.values]

In [63]:
len(dropped_overlap_0s)

0

Now check for overlaps again:

In [64]:
df_bg = df_cln

In [65]:
df_bg.shape

(12450, 7)

In [66]:
df_bg.loc[(df_bg.begin_time > df_bg.end_time.shift(1)) & (df_bg.end_time < df_bg.begin_time.shift(-1))].shape

(10195, 7)

In [67]:

df_bg.loc[df_bg.next_begin_time < df_bg.end_time, 'end_time'] = df_bg[df_bg.next_begin_time < df_bg.end_time][
    'next_end_time']
df_bg.loc[df_bg.next_begin_time < df_bg.end_time, 'merged'] = 1
# df_bg.loc[df_bg.merged.shift(1)==1, 'merged'] = -1

In [68]:
df_bg[df_bg.next_begin_time < df_bg.end_time].head()

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,overlap,merged
0,180913_081527 (1),59.532117,60.937416,59.652743,60.937416,1.0,1
2,180913_081527 (1),62.13765,63.434385,62.559843,63.434385,1.0,1
7,180913_081527 (1),71.271167,72.754873,71.590827,72.754873,1.0,1
9,180913_081527 (1),74.972095,76.172329,74.984157,76.172329,1.0,1
11,180913_081527 (1),77.059941,78.061142,77.494197,78.061142,1.0,1


In [69]:
df_bg.head(10)

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,overlap,merged
0,180913_081527 (1),59.532117,60.937416,59.652743,60.937416,1.0,1
2,180913_081527 (1),62.13765,63.434385,62.559843,63.434385,1.0,1
4,180913_081527 (1),64.068321,65.576152,66.978572,68.263244,0.0,0
5,180913_081527 (1),66.978572,68.263244,68.329589,69.511729,0.0,0
6,180913_081527 (1),68.329589,69.511729,71.271167,73.158972,0.0,0
7,180913_081527 (1),71.271167,72.754873,71.590827,72.754873,1.0,1
9,180913_081527 (1),74.972095,76.172329,74.984157,76.172329,1.0,1
11,180913_081527 (1),77.059941,78.061142,77.494197,78.061142,1.0,1
13,180913_081527 (1),80.034604,81.650999,82.201279,83.57039,0.0,0
14,180913_081527 (1),82.201279,83.57039,84.318814,85.368265,0.0,0


Make sure only the right rows are marked as merged==-1 (=double):

In [70]:
df_bg.iloc[35:42]

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,overlap,merged
52,180913_081527 (1),136.265002,137.339729,137.851504,139.847427,0.0,0
53,180913_081527 (1),137.851504,138.482693,137.868563,138.482693,1.0,1
55,180913_081527 (1),138.926232,140.649207,141.365692,142.269828,0.0,0
56,180913_081527 (1),141.365692,142.269828,144.455895,145.837687,0.0,0
57,180913_081527 (1),144.455895,146.669096,145.785627,146.669096,1.0,1
58,180913_081527 (1),145.785627,147.697136,145.803569,147.697136,1.0,1
60,180913_081527 (1),149.931887,150.810342,150.145034,150.810342,1.0,1


In [71]:
df_bg[(df_bg.merged == -1) & (df_bg.end_time != df_bg.end_time.shift(1))]

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,overlap,merged


In [72]:
df_one_file = df[df.filename == filename]

In [73]:
df_one_file.shape
df_one_file.filename.unique()

(768, 6)

array(['210824-104507_Tr1.txt'], dtype=object)

In [74]:
df_one_file.head()

Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length
0,1,0.80302,1.564808,,210824-104507_Tr1.txt,0.761789
1,2,1.525333,1.770798,,210824-104507_Tr1.txt,0.245465
2,3,2.549516,3.031982,,210824-104507_Tr1.txt,0.482466
3,4,2.371765,2.693409,,210824-104507_Tr1.txt,0.321644
4,5,2.879624,3.345162,,210824-104507_Tr1.txt,0.465538


In [75]:
df_one_file.sort_values('begin_time')

Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length
0,1,0.80302,1.564808,,210824-104507_Tr1.txt,0.761789
1,2,1.525333,1.770798,,210824-104507_Tr1.txt,0.245465
3,4,2.371765,2.693409,,210824-104507_Tr1.txt,0.321644
2,3,2.549516,3.031982,,210824-104507_Tr1.txt,0.482466
4,5,2.879624,3.345162,,210824-104507_Tr1.txt,0.465538
5,6,3.986533,4.316642,,210824-104507_Tr1.txt,0.330109
6,7,4.316642,5.010716,,210824-104507_Tr1.txt,0.694074
7,8,5.227139,5.481068,,210824-104507_Tr1.txt,0.25393
8,9,5.887356,6.132821,,210824-104507_Tr1.txt,0.245465
9,10,6.309838,7.012377,,210824-104507_Tr1.txt,0.702539


In [76]:
df.columns

Index(['Selection', 'begin_time', 'end_time', 'Annotation', 'filename',
       'call_length'],
      dtype='object')

In [77]:
df_bg.shape

(12450, 7)

In [79]:
df_bg['bg_end_time'] = df_bg.groupby('filename').begin_time.shift(-1)
# df_bg = df_bg.drop('begin_time', axis=1)
# df_bg = df_bg.rename({'end_time': 'bg_begin_time'}, axis=1)
# df_bg['call_length']

#get rid of overlap:
df_bg = df_bg[df_bg.bg_end_time > df_bg.end_time]

df_bg.shape
df_bg.head(10)
# df_bg.tail(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_bg['bg_end_time'] = df_bg.groupby('filename').begin_time.shift(-1)


(11019, 8)

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,overlap,merged,bg_end_time
0,180913_081527 (1),59.532117,60.937416,59.652743,60.937416,1.0,1,62.13765
2,180913_081527 (1),62.13765,63.434385,62.559843,63.434385,1.0,1,64.068321
4,180913_081527 (1),64.068321,65.576152,66.978572,68.263244,0.0,0,66.978572
5,180913_081527 (1),66.978572,68.263244,68.329589,69.511729,0.0,0,68.329589
6,180913_081527 (1),68.329589,69.511729,71.271167,73.158972,0.0,0,71.271167
7,180913_081527 (1),71.271167,72.754873,71.590827,72.754873,1.0,1,74.972095
9,180913_081527 (1),74.972095,76.172329,74.984157,76.172329,1.0,1,77.059941
11,180913_081527 (1),77.059941,78.061142,77.494197,78.061142,1.0,1,80.034604
13,180913_081527 (1),80.034604,81.650999,82.201279,83.57039,0.0,0,82.201279
14,180913_081527 (1),82.201279,83.57039,84.318814,85.368265,0.0,0,84.318814


In [80]:
df_bg = df[['filename', 'begin_time', 'end_time']].sort_values(['filename', 'begin_time']).copy()
df_bg['bg_end_time'] = df_bg.groupby('filename').begin_time.shift(-1)
# df_bg = df_bg.drop('begin_time', axis=1)
# df_bg = df_bg.rename({'end_time': 'bg_begin_time'}, axis=1)
# df_bg['call_length']

#get rid of overlap:
# df_bg = df_bg[df_bg.bg_end_time > df_bg.end_time]

df_bg.shape
df_bg.head(10)
df_bg.tail(10)

(14898, 4)

Unnamed: 0,filename,begin_time,end_time,bg_end_time
0,180913_081527 (1),59.532117,60.647912,59.652743
1,180913_081527 (1),59.652743,60.937416,62.13765
2,180913_081527 (1),62.13765,63.609293,62.559843
3,180913_081527 (1),62.559843,63.434385,64.068321
165,180913_081527 (1),64.068321,65.576152,66.978572
4,180913_081527 (1),66.978572,68.263244,68.329589
5,180913_081527 (1),68.329589,69.511729,71.271167
6,180913_081527 (1),71.271167,73.158972,71.590827
7,180913_081527 (1),71.590827,72.754873,74.972095
9,180913_081527 (1),74.972095,76.081859,74.984157


Unnamed: 0,filename,begin_time,end_time,bg_end_time
239,25-115438_Tr2,572.175029,573.815959,574.472331
240,25-115438_Tr2,574.472331,575.35661,575.566285
241,25-115438_Tr2,575.566285,576.377633,575.775959
242,25-115438_Tr2,575.775959,577.526285,579.313619
243,25-115438_Tr2,579.313619,580.243479,579.833247
244,25-115438_Tr2,579.833247,580.553433,580.662828
245,25-115438_Tr2,580.662828,581.683851,581.838828
246,25-115438_Tr2,581.838828,582.759572,583.115107
247,25-115438_Tr2,583.115107,584.281991,584.327572
248,25-115438_Tr2,584.327572,585.175386,


In [81]:
df_one_file['bg_time'] = df_one_file['begin_time'].shift(-1) - df_one_file['end_time']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_one_file['bg_time'] = df_one_file['begin_time'].shift(-1) - df_one_file['end_time']


In [82]:
df_one_file

Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length,bg_time
0,1,0.80302,1.564808,,210824-104507_Tr1.txt,0.761789,-0.039476
1,2,1.525333,1.770798,,210824-104507_Tr1.txt,0.245465,0.778718
2,3,2.549516,3.031982,,210824-104507_Tr1.txt,0.482466,-0.660217
3,4,2.371765,2.693409,,210824-104507_Tr1.txt,0.321644,0.186215
4,5,2.879624,3.345162,,210824-104507_Tr1.txt,0.465538,0.641371
5,6,3.986533,4.316642,,210824-104507_Tr1.txt,0.330109,0.0
6,7,4.316642,5.010716,,210824-104507_Tr1.txt,0.694074,0.216423
7,8,5.227139,5.481068,,210824-104507_Tr1.txt,0.25393,0.406287
8,9,5.887356,6.132821,,210824-104507_Tr1.txt,0.245465,0.177017
9,10,6.309838,7.012377,,210824-104507_Tr1.txt,0.702539,0.639912


In [83]:
bg_segments = []
for file in (unique_files):
    file_df = non_overlap_all[non_overlap_all['filename'] == file]
    begin = np.array(file_df['begin_time'])
    end = np.array(file_df['end_time'])
    for item in end:
        next_beginning = begin[begin > item]
        if next_beginning.size == 0:
            break
        next_beginning = np.min(next_beginning)
        bg_segments.append([item, next_beginning, file])
bg_segments = pd.DataFrame(bg_segments, columns=['begin_time', 'end_time', 'filename'])
bg_segments = filter_df(bg_segments, out_of_water_dict)
bg_segments['call_length'] = bg_segments['end_time'] - bg_segments['begin_time']

NameError: name 'unique_files' is not defined