In [None]:
from dotenv import load_dotenv
import os
import pandas as pd
import glob
from opensoundscape.annotations import BoxedAnnotations
from itertools import compress
import statsmodels.formula.api as smf
import numpy as np

In [None]:
# Get selection table file paths
# Load local file paths from .env file and create variables
load_dotenv()

# all_sample_dirs = os.getenv('all_sample_dirs')
all_sample_dirs = '/mnt/c/Users/isobe/Raven Lite 2/Selections/calke_abbey_sample1'

In [None]:
# Define function to get .txt file names from comma-separated list of directories
def list_files(dirs, ext):
    
    # Split directories into separate file paths where there are multiple
    dirs_list = dirs.split(",")

    # Get list of relevant file paths from all specified folders
    file_paths = [file for x in dirs_list for file in glob.glob(f"{x}/*{ext}")]

    # Return list
    return file_paths

In [None]:
# Get list of selection table files
file_list = list_files(all_sample_dirs, ext='.txt')

In [None]:
# Load annotations without audio
annotations = BoxedAnnotations.from_raven_files(
    file_list, annotation_column="Annotation" #, audio_files = sorted(audio)
)

In [None]:
# Get dataframe from boxed annotations
annotations_df = annotations.df

In [None]:
# Add column to annotations showing whether door was open or closed
annotations_df['date'] = annotations_df['annotation_file'].str.slice(95, 103)

annotations_df['door_open'] = annotations_df['date'].case_when(
    [
        (annotations_df['date'] < '20230624', False),
        (annotations_df['date'] >= '20230624', True)
    ]
)

In [None]:
# Get list of files where cage door is open vs closed
file_df = pd.DataFrame({'filename': file_list})
file_df['date'] = file_df['filename'].str.slice(95, 103)
file_df['door_open'] = file_df['date'].case_when(
    [
        (file_df['date'] < '20230624', False),
        (file_df['date'] >= '20230624', True)
    ]
)

door_open_files = file_df[file_df['door_open'] == True]['filename'].tolist()
door_closed_files = file_df[file_df['door_open'] == False]['filename'].tolist()

In [None]:
# Filter to dormouse calls only
annotations_hdor = annotations_df[annotations_df['annotation'].str.startswith('hdor')]

In [None]:
# Function to print statistics about vocalisation rate
def summarise_annotations(annotations_df, file_list):
    print("55s recordings containing dormouse calls")
    print(len(annotations_df['annotation_file'].unique()))
    print("Total 55s recordings")
    print(len(file_list))
    print("55s recordings containing calls (%)")
    print(len(annotations_df['annotation_file'].unique()) / len(file_list) * 100)
    print("Total dormouse calls duration (s)")
    print(sum(annotations_df['Delta Time (s)']))
    print("Dormouse calls (count)")
    print(len(annotations_df))
    print("Total recording duration (s)")
    print(len(file_list) * 55)
    print("Calls per minute")
    print(len(annotations_df) / (len(file_list) * 55 / 60))
    print("Calls per hour")
    print(len(annotations_df) / (len(file_list) * 55 / 60) * 60)
    print("Calls per minute (among recordings containing calls)")
    print(len(annotations_df) / (len(annotations_df['annotation_file'].unique()) * 55 / 60))

In [None]:
# Get list of files where cage door is open vs closed
file_df = pd.DataFrame({'filename': file_list})
file_df['date'] = file_df['filename'].str.slice(95, 103)
file_df['door_open'] = file_df['date'].case_when(
    [
        (file_df['date'] < '20230624', False),
        (file_df['date'] >= '20230624', True)
    ]
)

door_open_files = file_df[file_df['door_open'] == True]['filename'].tolist()
door_closed_files = file_df[file_df['door_open'] == False]['filename'].tolist()
sample1_files = file_df[file_df['filename'].str.contains('sample1')]['filename'].tolist()

In [None]:
# Summarise dormouse calls with the cage door closed
print("Door closed (Sample 1)")
annotations_hdor_closed = annotations_hdor[annotations_hdor['door_open'] == False]
summarise_annotations(annotations_hdor_closed, door_closed_files)

In [None]:
# Summarise dormouse calls with the cage door open
print("Door open (Sample 1 )")
annotations_hdor_open = annotations_hdor[annotations_hdor['door_open'] == True]
summarise_annotations(annotations_hdor_open, door_open_files)

In [None]:
# Summarise dormouse calls with the cage door open or closed
print("Door open or closed (i.e. all of Sample 1)")
summarise_annotations(annotations_hdor, file_list)

In [None]:
annotations_hdor

In [None]:
# Summarise ascending calls with cage door open or closed
annotations_hdor_asc = annotations_hdor[annotations_hdor['annotation'].str.startswith('hdor_asc')]

print("Ascending calls (door open or closed)")
summarise_annotations(annotations_hdor_asc, file_list)

In [None]:
# Summarise ascending calls with cage door open or closed
annotations_hdor_arch = annotations_hdor[annotations_hdor['annotation'].str.startswith('hdor_arch')]

print("Arch calls (door open or closed)")
summarise_annotations(annotations_hdor_arch, file_list)

In [None]:
len(annotations_hdor_asc)

In [None]:
# Get count of calls in each recording
file_calls_nonzero = annotations_hdor.groupby(['annotation_file'])['annotation'].count().reset_index()
file_calls_nonzero.rename(columns={"annotation": "call_count"}, inplace=True)

# Merge to file list to include recordings with 0 calls
file_calls = file_df.merge(file_calls_nonzero, how = 'left', left_on='filename', right_on='annotation_file')

# Tidy up after merge
file_calls = file_calls.fillna({'call_count':0})
file_calls.drop(columns=['annotation_file'], inplace=True)

# Add binary column showing whether calls are present or not
file_calls['call_binary'] = file_calls['call_count'] > 0
file_calls['call_binary_num'] = file_calls['call_binary'].astype(int)

# Add column for cage number
file_calls['cage'] = file_calls['filename'].str.slice(63, 65)

In [None]:
model = smf.logit("call_binary_num ~ door_open + C(cage)", data=file_calls).fit()

In [None]:
file_calls.groupby(['cage', 'door_open'])['call_count'].sum()

In [None]:
model.summary()

In [None]:
# Add call type column
annotations_hdor = annotations_hdor.copy()

annotations_hdor['call_type'] = np.select(
    [
        annotations_hdor['annotation'].str.startswith('hdor_asc'),
        annotations_hdor['annotation'].str.startswith('hdor_arch'),
    ],
    ['asc', 'arch'],
    default='other'
)

# Get count of calls in each recording, split by call type
file_calls_nonzero_call_type = annotations_hdor.groupby(['annotation_file', 'call_type'])['annotation'].count().reset_index()
file_calls_nonzero_call_type.rename(columns={"annotation": "call_count"}, inplace=True)

# Split file list rows into call types
call_types_df = pd.DataFrame({'call_type': ['asc', 'arch', 'other']})
file_df_call_type = file_df.merge(call_types_df, how="cross")

# Merge to file list to include recordings with 0 calls
file_calls_call_type = file_df_call_type.merge(file_calls_nonzero_call_type, how = 'left', left_on=['filename', 'call_type'], right_on=['annotation_file', 'call_type'])

# Tidy up after merge
file_calls_call_type = file_calls_call_type.fillna({'call_count':0})
file_calls_call_type.drop(columns=['annotation_file'], inplace=True)

# Add binary column showing whether calls are present or not
file_calls_call_type['call_binary'] = file_calls_call_type['call_count'] > 0
file_calls_call_type['call_binary_num'] = file_calls_call_type['call_binary'].astype(int)

# Add column for cage number
file_calls_call_type['cage'] = file_calls_call_type['filename'].str.slice(63, 65)

In [None]:
model = smf.logit("call_binary_num ~ call_type + C(cage)", data=file_calls_call_type).fit()

In [None]:
file_calls_call_type.groupby(['cage', 'call_type'])['call_count'].sum()

In [None]:
model.summary()