In [None]:
# OpenSoundscape imports
from opensoundscape import Audio, Spectrogram
from opensoundscape.annotations import BoxedAnnotations

# General-purpose packages
import os
import numpy as np
import pandas as pd
import glob
from pathlib import Path
from dotenv import load_dotenv
import matplotlib.pyplot as plt

In [None]:
# Get selection table file paths
# Load local file paths from .env file and create variables
load_dotenv()

all_sample_dirs = os.getenv('all_sample_dirs')

In [None]:
# Define function to get .txt file names from comma-separated list of directories
def list_files(dirs):
    
    # Split directories into separate file paths where there are multiple
    dirs_list = dirs.split(",")

    # Get list of .txt selection table file paths from all specified folders
    file_paths = [file for x in dirs_list for file in glob.glob(f"{x}/*.txt")]

    # Return list
    return file_paths

In [None]:
# Get full list of selection table files
file_list = list_files(all_sample_dirs)

In [None]:
# Load selection tables
annotations = BoxedAnnotations.from_raven_files(
    file_list, annotation_column="Annotation"
)

# Convert to df
annotations_df = annotations.df

In [None]:
# Total number of box annotations in data
annotations_df.shape[0]

In [None]:
# Number of box annotations for each label
annotations_df.annotation.value_counts()

In [None]:
# Filter annotations table to dormouse annotations
annotations_hdor_df = annotations_df[annotations_df['annotation'].str.startswith('hdor')].copy()

In [None]:
# Split cam/ncam annotation into separate column
annotations_hdor_df['cam'] = [x.rsplit('_', 1)[1] for x in annotations_hdor_df['annotation']]

In [None]:
# Split call type into separate column
annotations_hdor_df['call_type'] = [x.rsplit('_', 1)[0] for x in annotations_hdor_df['annotation']]

In [None]:
# Count of each call type
annotations_hdor_df.call_type.value_counts()

In [None]:
annotations_hdor_df.columns

In [None]:
# Average length across all call types
annotations_hdor_df['Delta Time (s)'].mean()

In [None]:
# Histogram of call length across types
plt.hist(annotations_hdor_df['Delta Time (s)'])
plt.show()

In [None]:
# Average length split by call type
annotations_hdor_df.groupby('call_type')['Delta Time (s)'].mean()

In [None]:
# Histogram of call length split by type
annotations_hdor_df.hist('Delta Time (s)', by = 'call_type', sharex = True, figsize=(12,9))
plt.xticks(np.arange(0, 1.25, 0.1))
plt.show()