In [None]:
import numpy as np
import pandas as pd
import os
import random
import tensorflow as tf
from CNN.utility import train_routine
from ds_creation.ds_utility import get_file_count, get_other_class
from ds_creation.split_config import phisical_split
from ds_creation.plot_utility import process_metrics
from prototypical.train.train_setup import train
from prototypical.model.loader import get_samples



SPLIT_PERC = {'train': 0.8, 'val': 0.2}
TEST_SPLIT = 0.2

DATA_DIR = os.path.join('data', 'mammals_calls', 'data')
TEST_DIR = os.path.join('data', 'mammals_calls_test')
SPLIT_DIR = os.path.join('data', 'mammals_calls', 'splits', 'custom')
AUDIO_DIR = os.path.join('data', 'audio')
MODELS_METRICS_DIR = os.path.join("models_metrics")
CNN_CACHE_DIR = os.path.join("data_cache", "CNN")

PATIENCE = 5
TO_TRAIN = False
FROM_START = True
os.makedirs(MODELS_METRICS_DIR, exist_ok=True)
os.makedirs(SPLIT_DIR, exist_ok=True)
    
h = 164
w = 397

seed = 2025
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

# Eseguo lo split fisico dei file solo la prima volta per dividere i file in train e test
FIRST_RUN = False
if FIRST_RUN:
    perc = 1-TEST_SPLIT
    phisical_split(DATA_DIR, perc)

Vengono divise le specie in tre gruppi:
* Quelle per il training della CNN, hanno più di 1000 sample, le restanti classi formeranno la classe other
* Quelle per il training della Prototypical, hanno tra i 1000 ed i 100 sample
* Quelle per il test della Prototypical, hanno meno di 100 sample 

Così vengono creati i file che serviranno alla prototypical per splittare le classi tra train, validation e test

In [None]:
count_df = get_file_count(DATA_DIR)

CNN_training = count_df[count_df['file_count'] >= 1000]['species'].tolist()
proto_training = count_df[(count_df['file_count'] <= 1000) & (count_df['file_count'] >= 100)]['species'].tolist()
proto_test = count_df[count_df['file_count'] < 100]['species'].tolist()

random.shuffle(proto_training)
split_idx = int(len(proto_training) * SPLIT_PERC['train'])
proto_train = proto_training[:split_idx]
proto_val = proto_training[split_idx:]
print(len(proto_train), len(proto_val), len(proto_test))
with open(os.path.join(SPLIT_DIR, 'test.txt'), 'w') as f:
    for species in proto_test:
        f.write(f"{species}\n")
with open(os.path.join(SPLIT_DIR, 'train.txt'), 'w') as f:
    for species in proto_train:
        f.write(f"{species}\n")
with open(os.path.join(SPLIT_DIR, 'val.txt'), 'w') as f:
    for species in proto_val:
        f.write(f"{species}\n")



Viene fisicamente generata la classe other, contenente i files di tutte le altre classi che hanno meno di 1000 sample.
Viene quindi avviato il training della CNN su queste classi con uno split train/val 80/20 per stabilire il numero di epoche ottimale per l'addestramento.

In [None]:
count_df_truncated = get_file_count(DATA_DIR)
count_df_truncated = count_df_truncated[count_df_truncated['file_count'] < 999]

other_species_list = count_df_truncated['species'].tolist()
print(f'Other total files: {count_df_truncated["file_count"].sum()}, species count: {count_df_truncated.shape}, species: {other_species_list}')
get_other_class(DATA_DIR, other_species_list)

count_df = get_file_count(DATA_DIR)

In [None]:
other_ds, history = train_routine(count_df, PATIENCE, SPLIT_PERC, DATA_DIR, (w, h), (0, 0), cardinality=1000, subfolder='31-10_training', from_start=FROM_START, to_train=TO_TRAIN)

In [None]:
curr_training_date = '14-10'
process_metrics(count_df, 13, os.path.join(MODELS_METRICS_DIR, f'{curr_training_date}_training'), MODELS_METRICS_DIR)

## Proto Training

In [12]:
way = 4
support = 5
query = 3
config = {
    "data.dataset": "mammals_calls",
    "data.split": "custom",
    "data.train_way": way,
    "data.train_support": support,
    "data.train_query": query,
    "data.test_way": way,
    "data.test_support": support,
    "data.test_query": query,
    "data.episodes": 10,
    "data.gpu": 0,
    "data.cuda":True,
    "model.x_dim": "164,397,3",
    "model.z_dim": 64,
    "train.epochs": 50,
    'train.optim_method': "Adam",
    "train.lr": 0.001,
    "train.patience": 5,
    "model.save_path": 'data_cache/proto/test_mammals_calls.keras'
}

train(config)

Loaded 2 splits with 4 classes each.
Training started.
Epoch 1 started.
support shape: (4, 5, 164, 397, 3), query shape: (4, 3, 164, 397, 3)
x shape: (12, 96), y shape: (4, 96)
support shape: (4, 5, 164, 397, 3), query shape: (4, 3, 164, 397, 3)
x shape: (12, 96), y shape: (4, 96)
support shape: (4, 5, 164, 397, 3), query shape: (4, 3, 164, 397, 3)
x shape: (12, 96), y shape: (4, 96)
Epoch 1 ended.
Epoch 2, Loss: 6.384324073791504, Accuracy: 0.6583332419395447, Val Loss: 1.345668911933899, Val Accuracy: 0.6305556297302246
Saving new best model with accuracy: 0.6305556
Epoch 2 started.
Epoch 2 ended.
Epoch 3, Loss: 1.0188957452774048, Accuracy: 0.5416666865348816, Val Loss: 1.065984845161438, Val Accuracy: 0.6000000238418579
Epoch 3 started.
Epoch 3 ended.
Epoch 4, Loss: 0.9542425274848938, Accuracy: 0.5749999284744263, Val Loss: 0.9351992011070251, Val Accuracy: 0.7092592716217041
Saving new best model with accuracy: 0.7092593
Epoch 4 started.
Epoch 4 ended.
Epoch 5, Loss: 0.8383702039

In [None]:
classes = proto_val + proto_train + proto_test
n_support_dict = {curr_class: len(os.listdir(os.path.join(DATA_DIR, curr_class))) for curr_class in classes}
results = get_samples(classes, n_support_dict, {'w':w, 'h':h, 'c':3}, os.path.join('data_cache', 'proto', 'test_mammals_calls.keras'), DATA_DIR)