In [1]:
import pandas as pd
from random import sample

In [2]:
def create_stratified_sample(input_csv_path):
    try:
        # Read in the original csv file
        audio_files = pd.read_csv(input_csv_path, low_memory=False)
        
        # Here I filter the csv file by duration and start time
        filtered_by_duration = audio_files[(audio_files['Duration'] >= 60) & (~audio_files['StartDateTime'].isna())].copy()

        # Create list of valid AudioMoth devices since some have known issues
        valid_devices = filtered_by_duration['AudioMothCode'].value_counts()
        excluded_devices = ['AM-21', 'AM-19', 'AM-8', 'AM-28']
        valid_audio_moths = valid_devices[(valid_devices >= 24) & (~valid_devices.index.isin(excluded_devices))].index.tolist()

        # Column for the hour the file was recorded
        filtered_by_duration['Hour'] = filtered_by_duration['Comment'].str.extract(r'(\d{2}):')[0].astype(int)

        # Initialize DataFrame for stratified sample
        stratified_sample = pd.DataFrame(columns=filtered_by_duration.columns)

        # Operation for random selection of data
        for audio_moth in valid_audio_moths:
            specific_audio_moth = filtered_by_duration[filtered_by_duration['AudioMothCode'] == audio_moth]
            for hour in range(24):
                specific_hour_of_audio_moth = specific_audio_moth[specific_audio_moth['Hour'] == hour]
                if not specific_hour_of_audio_moth.empty:
                    randomly_chosen_files = sample(specific_hour_of_audio_moth.index.tolist(), min(len(specific_hour_of_audio_moth), 24))
                    stratified_sample = stratified_sample.append(specific_hour_of_audio_moth.loc[randomly_chosen_files])

        # Here is where we save the outputed file
        stratified_sample.to_csv('output.csv', index=False)
        
        return True
    except Exception as e:
        print(f"Error: {e}")
        return False

# Example usage
success = create_stratified_sample('Peru_2019_AudioMoth_Data_Full.csv')
print(success)

True
