In [None]:
# Load packages
import os
import glob
from pathlib import Path
import shutil
import pandas as pd

In [None]:
# Change working directory to external drive containing data
path = '/mnt/e/'
os.chdir(path)

In [None]:
# Get list of .WAV audio files from 3 selected cages
path = ["01_NR23_CalkeAbbey_Dormouse_M*",
        "03_NR23_CalkeAbbey_Dormouse_M*",
        "11_NR23_CalkeAbbey_Dormouse_M*"]

file_list = glob.glob(f"{path[0]}/*.WAV") + glob.glob(f"{path[1]}/*.WAV") + glob.glob(f"{path[2]}/*.WAV")

In [None]:
# Make dataframe with 1 row per file
file_df = pd.DataFrame({'file_path': file_list})

In [None]:
# Prep variables to stratify by
file_df['cage'] = file_df['file_path'].str.slice(0, 2)
file_df['date'] = file_df['file_path'].str.slice(32, 40)
file_df['start_time'] = file_df['file_path'].str.slice(41, 47)
file_df['start_hour'] = file_df['start_time'].str.slice(0, 2)
file_df['file_name'] = file_df['file_path'].str.replace("/", "_")

In [None]:
# Split start times into 3 equal bins
file_df['time_bin'] = file_df['start_hour'].case_when(
    [
        (file_df['start_hour'].isin(['20', '21', '22']), 'early'),
        (file_df['start_hour'].isin(['23', '00', '01']), 'mid'),
        (file_df['start_hour'].isin(['02', '03', '04']), 'late')
    ]
)

In [None]:
# Check whether door was open or closed based on date
file_df['door_open'] = file_df['date'].case_when(
    [
        (file_df['date'] < '20230624', False),
        (file_df['date'] >= '20230624', True)
    ]
)

In [None]:
# Group by variables and take stratified random sample
sample_df = file_df.groupby(['cage', 'time_bin', 'door_open']).sample(n=20, random_state=1)

In [None]:
sample = sample_df['file_path'].tolist()

In [None]:
# Copy selected files to a new folder
# save_dir = 'train_sample'
# Path(save_dir).mkdir(exist_ok=True)
save_dir = '/mnt/c/Users/isobe/Documents/Hazel dormouse data/calke_abbey_sample1'
os.makedirs(save_dir, exist_ok=True)

for x in sample:
    file_name = x.replace("/", "_")
    save_path = save_dir + '/' + file_name
    shutil.copyfile(x, save_path)