In [None]:
# Packages
import pandas as pd
import numpy as np
import random
import torch

from src.utils import create_train_sizes, process_species_model, run_training

In [None]:
# Parameters

SPECIES_LIST = ['human_vocal'] # , 'engine', 'woodhouses_toad','pacific_chorus_frog','bullfrog','field_cricket','coyote']
AUDIO_DIR = "/workspaces/bird_new/data/non-avian_ML/audio"
RESULTS_DIR = "/workspaces/bird_new/data/non-avian_ML/results"

MODEL_LIST = ['BirdNET', 'Perch']
BATCH_SIZE = 128
EPOCH = 'NA'
LEARNING_RATE = 'NA'

In [None]:
# Seed
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

In [None]:
run_training(SPECIES_LIST, AUDIO_DIR, RESULTS_DIR, MODEL_LIST, BATCH_SIZE, EPOCH, LEARNING_RATE)

In [None]:
#### old code

for SPECIES in SPECIES_LIST:
    for MODEL in MODEL_LIST: 
        train_df = pd.read_csv(f"{AUDIO_DIR}/{SPECIES}/train_files.csv")
        test_df = pd.read_csv(f"{AUDIO_DIR}/{SPECIES}/test_files.csv")
        
        if MODEL == 'Perch':
            train_df['file'] = train_df['file'].str.replace('data', 'data_5s')
            test_df['file'] = test_df['file'].str.replace('data', 'data_5s')
            pd.set_option('display.max_colwidth', 100)
            print(train_df)
        train_df['file'] = train_df['file'].astype(str)
        test_df['file'] = test_df['file'].astype(str)
        
        train_df.set_index("file", inplace=True)
        test_df.set_index("file", inplace=True)
        
        # Create train sizes and store in a variable
        dataframes_list = create_train_sizes(train_df)

        # Load model
        model = torch.hub.load('kitzeslab/bioacoustics-model-zoo', MODEL, trust_repo=True)

        results = []

        # Iterate through train sizes and train/test
        for train_size, i in dataframes_list.items():
            train = pd.DataFrame(i)

            emb_train = model.embed(train, return_dfs=False, batch_size=BATCH_SIZE, num_workers=0)
            emb_val = model.embed(test_df, return_dfs=False, batch_size=BATCH_SIZE, num_workers=0)

            model.change_classes(['present'])

            quick_fit(model.network, emb_train, train.values, emb_val, test_df.values, steps=1000)

            predictions = model.network(torch.tensor(emb_val).float()).detach().numpy()
            score = roc_auc_score(test_df.values, predictions, average=None)

            results.append({'train_size': train_size, MODEL: score})

        # Train size 0 
        emb_val0 = model.embed(test_df, return_dfs=False, batch_size=BATCH_SIZE, num_workers=0)
        predictions0 = model.network(torch.tensor(emb_val0).float()).detach().numpy()
        score0 = roc_auc_score(test_df.values, predictions0, average=None)
        results.append({'train_size': 'train_size_0', MODEL: score0})

        # Generate unique hash from timestamp and username
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        username = getpass.getuser()
        unique_hash = hashlib.md5(f"{timestamp}_{username}".encode()).hexdigest()[:8]  # Shorten hash


        results_df = pd.DataFrame(results)
        results_df.to_csv(f"{RESULTS_DIR}/{SPECIES}-{MODEL}-{BATCH_SIZE}-{EPOCH}-{LEARNING_RATE}.csv", index=False)