In [1]:
import pandas as pd
import os

pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000
pd.options.display.max_colwidth = None

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

In [4]:
from soundbay.utils.metadata_processing import load_n_adapt_raven_annotation_table_to_dv_dataset_requirements

# params

In [37]:
annotations_dir = '../datasets/2021_annotations/'
cols2drop = ['View', 'Channel', 'Low Freq (Hz)', 'High Freq (Hz)', 'Delta Time (s)', 'Delta Freq (Hz)',
             'Avg Power Density (dB FS/Hz)']

# load annotations

In [279]:
df_list = []
for filename in os.listdir(annotations_dir):
    try:
        #         print(filename)
        annotation_file_path = os.path.join(annotations_dir, filename)
        small_df = load_n_adapt_raven_annotation_table_to_dv_dataset_requirements(annotation_file_path)
        df_list.append(small_df)
    except UnicodeDecodeError:
        continue

print(len(df_list))
df_all_annotations = pd.concat(df_list)
df_all_annotations = df_all_annotations.drop(cols2drop, axis=1)
# df_all_annotations.head()

33


In [280]:
df_all_annotations.shape
df_all_annotations.head()
df_all_annotations.tail()

(14898, 6)

Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length
0,1,59.532117,60.647912,song (s),180913_081527 (1),1.115795
1,2,59.652743,60.937416,s,180913_081527 (1),1.284672
2,3,62.13765,63.609293,s,180913_081527 (1),1.471644
3,4,62.559843,63.434385,s,180913_081527 (1),0.874542
4,5,66.978572,68.263244,s,180913_081527 (1),1.284672


Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length
244,249,579.833247,580.553433,sc ? un,25-115438_Tr2,0.720186
245,250,580.662828,581.683851,,25-115438_Tr2,1.021023
246,251,581.838828,582.759572,,25-115438_Tr2,0.920744
247,252,583.115107,584.281991,,25-115438_Tr2,1.166884
248,253,584.327572,585.175386,,25-115438_Tr2,0.847814


# explore labels

In [8]:
df_all_annotations.Annotation.unique()

array(['song (s)', 's', 'un', nan, 'un/d', 'd', 'ד', 'sc', 'd?', 'sc ?',
       'SC', '?', 'few calls in this annotation', 'un ', 'un !!! What ?',
       'un ?', 'w & un', 'sc?', '!', 'baby whale?',
       'un- weird whale sound probably', 'un d/w', 'Dolphins?', 'dolphin',
       'd / cs ?', 'sc \\ d \\ un', 'cs ?', 'un \\ d ?', 'sc ? un'],
      dtype=object)

In [9]:
df_all_annotations.Annotation.value_counts()

sc                                915
un                                449
d                                 320
s                                 142
?                                  46
sc ?                               14
un d/w                              7
d?                                  4
Dolphins?                           3
dolphin                             3
SC                                  2
un/d                                2
cs ?                                1
sc \ d \ un                         1
d / cs ?                            1
un- weird whale sound probably      1
un \ d ?                            1
song (s)                            1
un ?                                1
baby whale?                         1
!                                   1
sc?                                 1
w & un                              1
un !!! What ?                       1
un                                  1
few calls in this annotation        1
ד           

For labels that appear only once - find location

In [10]:
all_unique_labels = df_all_annotations.Annotation.value_counts()[
    df_all_annotations.Annotation.value_counts() == 1].index.values

In [11]:
all_unique_labels

array(['cs ?', 'sc \\ d \\ un', 'd / cs ?',
       'un- weird whale sound probably', 'un \\ d ?', 'song (s)', 'un ?',
       'baby whale?', '!', 'sc?', 'w & un', 'un !!! What ?', 'un ',
       'few calls in this annotation', 'ד', 'sc ? un'], dtype=object)

In [12]:
df_unique_labels = df_all_annotations[df_all_annotations.Annotation.isin(all_unique_labels)].copy()

In [14]:
df_unique_labels.columns

Index(['Selection', 'View', 'Channel', 'begin_time', 'end_time',
       'Low Freq (Hz)', 'High Freq (Hz)', 'Delta Time (s)', 'Delta Freq (Hz)',
       'Avg Power Density (dB FS/Hz)', 'Annotation', 'filename',
       'call_length'],
      dtype='object')

In [15]:
cols2keep = ['Selection', 'begin_time', 'end_time',
             'Low Freq (Hz)', 'High Freq (Hz)', 'Annotation', 'filename']

In [16]:
df_unique_labels = df_unique_labels[cols2keep]

In [18]:
df_unique_labels.to_csv('unique_labels_info.csv', index=False)

# label background

Methodology: all gaps between annotated segments are considered background

In [40]:
df_all_annotations.shape
df_all_annotations.filename.nunique()
df_all_annotations.head()

(14898, 6)

33

Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length
0,1,59.532117,60.647912,song (s),180913_081527 (1),1.115795
1,2,59.652743,60.937416,s,180913_081527 (1),1.284672
2,3,62.13765,63.609293,s,180913_081527 (1),1.471644
3,4,62.559843,63.434385,s,180913_081527 (1),0.874542
4,5,66.978572,68.263244,s,180913_081527 (1),1.284672


In [281]:
df = df_all_annotations


In [83]:
# for filename in df.filename.unique():
#     print(filename)

## merge overlapping calls

In [305]:

def merge_overlapping_calls_one_iteration(df: pd.DataFrame) -> pd.DataFrame:
    """
    Receives an annotation dataframe with (possibly) overlapping calls, and goes through one iteration of merging them.
    Depending on original input, returned dataframe may still have some (though less) overlapping calls.
    In order to merge all overlaps, this needs to be iterated.
    :param df: Pandas DataFrame with the following columns: ['filename', 'begin_time', 'end_time']
    :return: pd.DataFrame
    """
    df = df.sort_values(['filename', 'begin_time']).reset_index(drop=True)
    df['overlap'] = np.NaN

    while ~((df.overlap.nunique() == 1) & (df.overlap.unique()[0] == 0)):
        print('df.overlap.nunique: ', (df.overlap.nunique()))
        print(f'df.overlap.unique(): {df.overlap.unique()}')
        print(df.overlap.value_counts(dropna=False), '\n')
        # create cols and such
        df['overlap'] = np.NaN
        df['next_begin_time'] = df.groupby('filename').begin_time.shift(-1)
        df['next_end_time'] = df.groupby('filename').end_time.shift(-1)

        # mark overlap:
        df.loc[df.next_begin_time < df.end_time, 'overlap'] = 1
        df = merge_rows_marked_as_overlapping(df)
        df.loc[(df.begin_time > df.end_time.shift(1)) & (df.end_time < df.begin_time.shift(-1)), 'overlap'] = 0

    return df


def merge_rows_marked_as_overlapping(df):
    df.loc[df.overlap == 1, 'end_time'] = df[df.overlap == 1]['next_end_time']
    df = df.drop_duplicates(subset=['filename', 'end_time'], keep='first')
    return df


In [None]:
df_no_overlap = merge_overlapping_calls_one_iteration(df)

df.overlap.nunique:  0
df.overlap.unique()[0]: nan
Series([], Name: overlap, dtype: int64) 

df.overlap.nunique:  2
df.overlap.unique()[0]: 1.0
0.0    10195
1.0     2132
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique()[0]: nan
0.0    10658
1.0      888
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique()[0]: nan
0.0    10798
1.0      454
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique()[0]: nan
0.0    10865
1.0      243
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique()[0]: nan
0.0    10898
1.0      135
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique()[0]: nan
0.0    10920
1.0       72
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique()[0]: nan
0.0    10924
1.0       49
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique()[0]: nan
0.0    10931
1.0       27
Name: overlap, dtype: int64 

df.overlap.nunique:  2
df.overlap.unique()[0]: nan


df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10939
Name: overlap, dtype: int64 

df.overlap.nunique:  1
df.overlap.unique()[0]: nan
0.0    10

In [None]:
df.shape
df_no_overlap.shape

In [None]:
df.head()

In [286]:
df_no_overlap.head()

Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length,overlap
0,1,59.532117,60.647912,song (s),180913_081527 (1),1.115795,
1,2,59.652743,60.937416,s,180913_081527 (1),1.284672,
2,3,62.13765,63.609293,s,180913_081527 (1),1.471644,
3,4,62.559843,63.434385,s,180913_081527 (1),0.874542,
4,178,64.068321,65.576152,s,180913_081527 (1),1.507831,


In [200]:
df_bg = df[['filename', 'begin_time', 'end_time']].sort_values(['filename', 'begin_time']).reset_index(drop=True).copy()


In [201]:
df_bg['next_begin_time'] = df_bg.groupby('filename').begin_time.shift(-1)
df_bg['next_end_time'] = df_bg.groupby('filename').end_time.shift(-1)
df_bg['overlap'] = np.NaN
df_bg['merged'] = 0

In [202]:
df_bg

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,overlap,merged
0,180913_081527 (1),59.532117,60.647912,59.652743,60.937416,,0
1,180913_081527 (1),59.652743,60.937416,62.137650,63.609293,,0
2,180913_081527 (1),62.137650,63.609293,62.559843,63.434385,,0
3,180913_081527 (1),62.559843,63.434385,64.068321,65.576152,,0
4,180913_081527 (1),64.068321,65.576152,66.978572,68.263244,,0
...,...,...,...,...,...,...,...
14893,25-115438_Tr2,579.833247,580.553433,580.662828,581.683851,,0
14894,25-115438_Tr2,580.662828,581.683851,581.838828,582.759572,,0
14895,25-115438_Tr2,581.838828,582.759572,583.115107,584.281991,,0
14896,25-115438_Tr2,583.115107,584.281991,584.327572,585.175386,,0


Where the next segment's begin_time is before this segment's end time - merge with next segment. Meaning, end_time will be replaced with next_end_time.

In [203]:
df_bg.head(10)

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,overlap,merged
0,180913_081527 (1),59.532117,60.647912,59.652743,60.937416,,0
1,180913_081527 (1),59.652743,60.937416,62.13765,63.609293,,0
2,180913_081527 (1),62.13765,63.609293,62.559843,63.434385,,0
3,180913_081527 (1),62.559843,63.434385,64.068321,65.576152,,0
4,180913_081527 (1),64.068321,65.576152,66.978572,68.263244,,0
5,180913_081527 (1),66.978572,68.263244,68.329589,69.511729,,0
6,180913_081527 (1),68.329589,69.511729,71.271167,73.158972,,0
7,180913_081527 (1),71.271167,73.158972,71.590827,72.754873,,0
8,180913_081527 (1),71.590827,72.754873,74.972095,76.081859,,0
9,180913_081527 (1),74.972095,76.081859,74.984157,76.172329,,0


Mark overlapping segments

In [204]:
df_bg.loc[df_bg.next_begin_time < df_bg.end_time, 'overlap'] = 1


In [205]:
df_bg.loc[(df_bg.begin_time > df_bg.end_time.shift(1)) & (df_bg.end_time < df_bg.begin_time.shift(-1)), 'overlap'] = 0

In [219]:
df_bg.overlap.value_counts(dropna=False)

0.0    8534
1.0    3799
NaN    2565
Name: overlap, dtype: int64

Merge

In [207]:
df_bg.loc[df_bg.overlap == 1, 'end_time'] = df_bg[df_bg.overlap == 1]['next_end_time']
df_bg.loc[df_bg.overlap == 1, 'merged'] = 1
df_bg

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,overlap,merged
0,180913_081527 (1),59.532117,60.937416,59.652743,60.937416,1.0,1
1,180913_081527 (1),59.652743,60.937416,62.137650,63.609293,,0
2,180913_081527 (1),62.137650,63.434385,62.559843,63.434385,1.0,1
3,180913_081527 (1),62.559843,63.434385,64.068321,65.576152,,0
4,180913_081527 (1),64.068321,65.576152,66.978572,68.263244,0.0,0
...,...,...,...,...,...,...,...
14893,25-115438_Tr2,579.833247,580.553433,580.662828,581.683851,,0
14894,25-115438_Tr2,580.662828,581.683851,581.838828,582.759572,0.0,0
14895,25-115438_Tr2,581.838828,582.759572,583.115107,584.281991,0.0,0
14896,25-115438_Tr2,583.115107,584.281991,584.327572,585.175386,0.0,0


In [248]:
df_cln = df_bg.drop_duplicates(subset=['filename', 'end_time'], keep='first')

In [250]:
df_bg.shape
df_cln.shape
df_bg.shape[0] - df_cln.shape[0]

print('\noverlap:')
df_bg.overlap.value_counts(dropna=False)
df_cln.overlap.value_counts(dropna=False)

print('\nmerged:')
df_bg.merged.value_counts(dropna=False)
df_cln.merged.value_counts(dropna=False)


(14898, 7)

(12450, 7)

2448


overlap:


0.0    8534
1.0    3799
NaN    2565
Name: overlap, dtype: int64

0.0    8534
1.0    3793
NaN     123
Name: overlap, dtype: int64


merged:


0    11099
1     3799
Name: merged, dtype: int64

0    8657
1    3793
Name: merged, dtype: int64

### sanity: look for overlap==0 rows that were dropped

In [251]:
dropped_overlap_0s = [x for x in df_bg[df_bg.overlap == 0].index.values if
                      x not in df_cln[df_cln.overlap == 0].index.values]

In [252]:
len(dropped_overlap_0s)

0

Now check for overlaps again:

In [254]:
df_bg = df_cln

In [256]:
df_bg.shape

(12450, 7)

In [255]:
df_bg.loc[(df_bg.begin_time > df_bg.end_time.shift(1)) & (df_bg.end_time < df_bg.begin_time.shift(-1))].shape

(10195, 7)

In [182]:

df_bg.loc[df_bg.next_begin_time < df_bg.end_time, 'end_time'] = df_bg[df_bg.next_begin_time < df_bg.end_time][
    'next_end_time']
df_bg.loc[df_bg.next_begin_time < df_bg.end_time, 'merged'] = 1
# df_bg.loc[df_bg.merged.shift(1)==1, 'merged'] = -1

In [175]:
df_bg[df_bg.next_begin_time < df_bg.end_time].head()

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,merged
0,180913_081527 (1),59.532117,60.937416,59.652743,60.937416,1
2,180913_081527 (1),62.13765,63.434385,62.559843,63.434385,1
7,180913_081527 (1),71.271167,72.754873,71.590827,72.754873,1
9,180913_081527 (1),74.972095,76.172329,74.984157,76.172329,1
11,180913_081527 (1),77.059941,78.061142,77.494197,78.061142,1


In [176]:
df_bg.head(10)

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,merged
0,180913_081527 (1),59.532117,60.937416,59.652743,60.937416,1
1,180913_081527 (1),59.652743,60.937416,62.13765,63.609293,-1
2,180913_081527 (1),62.13765,63.434385,62.559843,63.434385,1
3,180913_081527 (1),62.559843,63.434385,64.068321,65.576152,-1
4,180913_081527 (1),64.068321,65.576152,66.978572,68.263244,0
5,180913_081527 (1),66.978572,68.263244,68.329589,69.511729,0
6,180913_081527 (1),68.329589,69.511729,71.271167,73.158972,0
7,180913_081527 (1),71.271167,72.754873,71.590827,72.754873,1
8,180913_081527 (1),71.590827,72.754873,74.972095,76.081859,-1
9,180913_081527 (1),74.972095,76.172329,74.984157,76.172329,1


Make sure only the right rows are marked as merged==-1 (=double):

In [181]:
df_bg.iloc[35:42]

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,merged
35,180913_081527 (1),111.631803,113.491253,115.759152,116.799761,0
36,180913_081527 (1),115.759152,116.987412,115.963862,116.987412,1
37,180913_081527 (1),115.963862,116.987412,118.539796,120.450423,-1
38,180913_081527 (1),118.539796,119.557383,118.754188,119.557383,1
39,180913_081527 (1),118.754188,120.228654,119.119808,120.228654,-1
40,180913_081527 (1),119.119808,120.228654,120.62782,121.44666,-1
41,180913_081527 (1),120.62782,121.276068,120.969003,121.276068,1


In [178]:
df_bg[(df_bg.merged == -1) & (df_bg.end_time != df_bg.end_time.shift(1))]

Unnamed: 0,filename,begin_time,end_time,next_begin_time,next_end_time,merged
22,180913_081527 (1),90.755800,91.588123,91.057366,91.588123,-1
39,180913_081527 (1),118.754188,120.228654,119.119808,120.228654,-1
44,180913_081527 (1),123.016103,123.834943,123.067280,123.834943,-1
58,180913_081527 (1),145.785627,147.697136,145.803569,147.697136,-1
64,180913_081527 (1),152.567436,153.741957,152.905563,153.741957,-1
...,...,...,...,...,...,...
14849,25-115438_Tr2,504.076445,505.498585,504.213189,505.498585,-1
14876,25-115438_Tr2,552.679678,554.776422,553.737166,554.776422,-1
14880,25-115438_Tr2,560.449006,561.734401,560.458122,561.734401,-1
14886,25-115438_Tr2,570.815302,573.369261,572.029168,573.369261,-1


In [135]:
df_one_file = df[df.filename == filename]

In [125]:
df_one_file.shape
df_one_file.filename.unique()

(249, 7)

array(['25-115438_Tr2'], dtype=object)

In [45]:
df_one_file.head()

Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length
0,1,14.210034,14.976796,,25-115438_Tr2,0.766761
1,2,17.372924,18.091763,,25-115438_Tr2,0.718839
2,3,21.126465,21.51943,,25-115438_Tr2,0.392965
3,4,17.476111,18.377055,,25-115438_Tr2,0.900944
4,5,23.514355,24.376962,,25-115438_Tr2,0.862606


In [48]:
df_one_file.sort_values('begin_time')

Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length
0,1,14.210034,14.976796,,25-115438_Tr2,0.766761
1,2,17.372924,18.091763,,25-115438_Tr2,0.718839
3,4,17.476111,18.377055,,25-115438_Tr2,0.900944
2,3,21.126465,21.51943,,25-115438_Tr2,0.392965
4,5,23.514355,24.376962,,25-115438_Tr2,0.862606
5,6,27.410318,28.004558,,25-115438_Tr2,0.59424
6,7,33.253732,33.809634,,25-115438_Tr2,0.555902
7,8,37.063434,37.274293,,25-115438_Tr2,0.210859
8,9,42.320398,42.972145,,25-115438_Tr2,0.651747
9,10,46.353684,46.871248,,25-115438_Tr2,0.517564


In [63]:
df.columns

Index(['Selection', 'begin_time', 'end_time', 'Annotation', 'filename',
       'call_length', 'bg_end_time'],
      dtype='object')

In [77]:
df_bg.shape

(14898, 4)

In [82]:
df_bg['bg_end_time'] = df_bg.groupby('filename').begin_time.shift(-1)
# df_bg = df_bg.drop('begin_time', axis=1)
# df_bg = df_bg.rename({'end_time': 'bg_begin_time'}, axis=1)
# df_bg['call_length']
x
#get rid of overlap:
df_bg = df_bg[df_bg.bg_end_time > df_bg.end_time]

df_bg.shape
df_bg.head(10)
# df_bg.tail(10)

(11019, 4)

Unnamed: 0,filename,begin_time,end_time,bg_end_time
1,180913_081527 (1),59.652743,60.937416,62.13765
3,180913_081527 (1),62.559843,63.434385,64.068321
165,180913_081527 (1),64.068321,65.576152,66.978572
4,180913_081527 (1),66.978572,68.263244,68.329589
5,180913_081527 (1),68.329589,69.511729,71.271167
7,180913_081527 (1),71.590827,72.754873,74.972095
8,180913_081527 (1),74.984157,76.172329,77.059941
11,180913_081527 (1),77.494197,78.061142,80.034604
12,180913_081527 (1),80.034604,81.650999,82.201279
13,180913_081527 (1),82.201279,83.57039,84.318814


In [79]:
df_bg = df[['filename', 'begin_time', 'end_time']].sort_values(['filename', 'begin_time']).copy()
df_bg['bg_end_time'] = df_bg.groupby('filename').begin_time.shift(-1)
# df_bg = df_bg.drop('begin_time', axis=1)
# df_bg = df_bg.rename({'end_time': 'bg_begin_time'}, axis=1)
# df_bg['call_length']

#get rid of overlap:
# df_bg = df_bg[df_bg.bg_end_time > df_bg.end_time]

df_bg.shape
df_bg.head(10)
df_bg.tail(10)

(14898, 4)

Unnamed: 0,filename,begin_time,end_time,bg_end_time
0,180913_081527 (1),59.532117,60.647912,59.652743
1,180913_081527 (1),59.652743,60.937416,62.13765
2,180913_081527 (1),62.13765,63.609293,62.559843
3,180913_081527 (1),62.559843,63.434385,64.068321
165,180913_081527 (1),64.068321,65.576152,66.978572
4,180913_081527 (1),66.978572,68.263244,68.329589
5,180913_081527 (1),68.329589,69.511729,71.271167
6,180913_081527 (1),71.271167,73.158972,71.590827
7,180913_081527 (1),71.590827,72.754873,74.972095
9,180913_081527 (1),74.972095,76.081859,74.984157


Unnamed: 0,filename,begin_time,end_time,bg_end_time
239,25-115438_Tr2,572.175029,573.815959,574.472331
240,25-115438_Tr2,574.472331,575.35661,575.566285
241,25-115438_Tr2,575.566285,576.377633,575.775959
242,25-115438_Tr2,575.775959,577.526285,579.313619
243,25-115438_Tr2,579.313619,580.243479,579.833247
244,25-115438_Tr2,579.833247,580.553433,580.662828
245,25-115438_Tr2,580.662828,581.683851,581.838828
246,25-115438_Tr2,581.838828,582.759572,583.115107
247,25-115438_Tr2,583.115107,584.281991,584.327572
248,25-115438_Tr2,584.327572,585.175386,


Unnamed: 0,filename,begin_time,bg_begin_time,bg_end_time
0,180913_081527 (1),59.532117,60.647912,59.652743
1,180913_081527 (1),59.652743,60.937416,62.13765
2,180913_081527 (1),62.13765,63.609293,62.559843
3,180913_081527 (1),62.559843,63.434385,64.068321
165,180913_081527 (1),64.068321,65.576152,66.978572
4,180913_081527 (1),66.978572,68.263244,68.329589
5,180913_081527 (1),68.329589,69.511729,71.271167
6,180913_081527 (1),71.271167,73.158972,71.590827
7,180913_081527 (1),71.590827,72.754873,74.972095
9,180913_081527 (1),74.972095,76.081859,74.984157


In [50]:
df_one_file['bg_time'] = df_one_file['begin_time'].shift(-1) - df_one_file['end_time']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_one_file['bg_time'] = df_one_file['begin_time'].shift(-1) - df_one_file['end_time']


In [51]:
df_one_file

Unnamed: 0,Selection,begin_time,end_time,Annotation,filename,call_length,bg_time
0,1,14.210034,14.976796,,25-115438_Tr2,0.766761,2.396129
1,2,17.372924,18.091763,,25-115438_Tr2,0.718839,3.034702
2,3,21.126465,21.51943,,25-115438_Tr2,0.392965,-4.04332
3,4,17.476111,18.377055,,25-115438_Tr2,0.900944,5.1373
4,5,23.514355,24.376962,,25-115438_Tr2,0.862606,3.033356
5,6,27.410318,28.004558,,25-115438_Tr2,0.59424,5.249174
6,7,33.253732,33.809634,,25-115438_Tr2,0.555902,3.2538
7,8,37.063434,37.274293,,25-115438_Tr2,0.210859,5.046105
8,9,42.320398,42.972145,,25-115438_Tr2,0.651747,3.381539
9,10,46.353684,46.871248,,25-115438_Tr2,0.517564,4.845727


In [None]:
bg_segments = []
for file in (unique_files):
    file_df = non_overlap_all[non_overlap_all['filename'] == file]
    begin = np.array(file_df['begin_time'])
    end = np.array(file_df['end_time'])
    for item in end:
        next_beginning = begin[begin > item]
        if next_beginning.size == 0:
            break
        next_beginning = np.min(next_beginning)
        bg_segments.append([item, next_beginning, file])
bg_segments = pd.DataFrame(bg_segments, columns=['begin_time', 'end_time', 'filename'])
bg_segments = filter_df(bg_segments, out_of_water_dict)
bg_segments['call_length'] = bg_segments['end_time'] - bg_segments['begin_time']