In [9]:
import os
import csv
import json

In [10]:
RESULTS_PATH = "/home/giacomoschiavo/finetuning-BirdNET/models/CustomCNN/"
# load all csv file sin the results path
def load_json_files(path):
    csv_files = [f for f in os.listdir(path) if f.endswith('.json') and 'best_model' not in f and '_0' in f]
    data = {}
    print(csv_files)
    for file_name in csv_files:
        with open(os.path.join(path, file_name), 'r') as f:
            data[file_name] = json.load(f)
    return data

all_results = load_json_files(RESULTS_PATH)

['model_ranking_config_9_0.json', 'model_ranking_config_4_0.json', 'model_ranking_config_3_0.json', 'model_ranking_config_7_0.json', 'model_ranking_config_8_0.json', 'model_ranking_config_5_0.json', 'model_ranking_config_10_0.json', 'model_ranking_config_2_0.json', 'model_ranking_config_6_0.json', 'model_ranking_config_1_0.json']


In [11]:
best_results = {file_name: config[0] for file_name, config in all_results.items()}
best_results

{'model_ranking_config_9_0.json': {'model_id': 3,
  'config': {'num_conv_layers': 4,
   'kernel_sizes': [2, 6, 6, 6],
   'channels': [16, 32, 64, 128],
   'batch_size': 32,
   'dropout': 0.5,
   'dense_hidden': 128},
  'micro_f1': 0.012686567164179102,
  'weighted_f1': 0.025733804970389563,
  'samples_f1': 0.005003695491500369,
  'mean_f1': 0.014474689208689677},
 'model_ranking_config_4_0.json': {'model_id': 9,
  'config': {'num_conv_layers': 4,
   'kernel_sizes': [2, 2, 5, 5],
   'channels': [16, 32, 64, 128],
   'batch_size': 64,
   'dropout': 0.5,
   'dense_hidden': 32},
  'micro_f1': 0.013167259786476869,
  'weighted_f1': 0.02426541886021572,
  'samples_f1': 0.004907612712490761,
  'mean_f1': 0.014113430453061114},
 'model_ranking_config_3_0.json': {'model_id': 8,
  'config': {'num_conv_layers': 4,
   'kernel_sizes': [3, 4, 5, 6],
   'channels': [16, 32, 64, 128],
   'batch_size': 32,
   'dropout': 0.5,
   'dense_hidden': 128},
  'micro_f1': 0.01276673354003283,
  'weighted_f1': 0

In [12]:
import pandas as pd
def analyze_model_results(best_results):
    # Lista per contenere i dati appiattiti per il DataFrame
    processed_data = []

    # Processa ogni elemento in all_results_data
    for file_name, model in best_results.items():
        row = {
            'file_name': file_name,
            'model_id': model.get('model_id'),
            'micro_f1': model.get('micro_f1'),
            'weighted_f1': model.get('weighted_f1'),
            'samples_f1': model.get('samples_f1'),
            'mean_f1': model.get('mean_f1')
        }

        config = model.get('config', {})
        row['num_conv_layers'] = config.get('num_conv_layers')
        row['kernel_sizes'] = str(config.get('kernel_sizes')) # Converti in stringa per display
        row['channels'] = str(config.get('channels'))       # Converti in stringa per display
        row['batch_size'] = config.get('batch_size')
        row['dropout'] = config.get('dropout')
        row['dense_hidden'] = config.get('dense_hidden')
        
        processed_data.append(row)

    # Crea il DataFrame di Pandas
    df = pd.DataFrame(processed_data)

    # Riorganizza le colonne per una migliore leggibilità
    df = df[['file_name', 'model_id', 'num_conv_layers', 'kernel_sizes', 'channels', 
             'batch_size', 'dropout', 'dense_hidden', 
             'micro_f1', 'weighted_f1', 'samples_f1', 'mean_f1']]

    return df, df['mean_f1'].idxmax() # Restituisce il DataFrame e il miglior modello

# Chiama la funzione per analizzare i risultati
df_results, index = analyze_model_results(best_results)


In [13]:
df_results.sort_values(by='mean_f1', ascending=False)

Unnamed: 0,file_name,model_id,num_conv_layers,kernel_sizes,channels,batch_size,dropout,dense_hidden,micro_f1,weighted_f1,samples_f1,mean_f1
8,model_ranking_config_6_0.json,19,4,"[4, 5, 6, 6]","[16, 32, 64, 128]",128,0.0,128,0.016786,0.02984,0.006456,0.017694
9,model_ranking_config_1_0.json,10,4,"[2, 2, 4, 4]","[16, 32, 64, 128]",64,0.0,128,0.016458,0.029512,0.006408,0.017459
5,model_ranking_config_5_0.json,9,4,"[4, 4, 6, 6]","[16, 32, 64, 128]",64,0.0,128,0.015046,0.027151,0.005617,0.015938
4,model_ranking_config_8_0.json,16,3,"[2, 4, 5]","[16, 32, 64]",128,0.0,128,0.014596,0.025748,0.005661,0.015335
6,model_ranking_config_10_0.json,7,3,"[2, 2, 5]","[16, 32, 64]",64,0.5,128,0.013686,0.025075,0.005122,0.014628
0,model_ranking_config_9_0.json,3,4,"[2, 6, 6, 6]","[16, 32, 64, 128]",32,0.5,128,0.012687,0.025734,0.005004,0.014475
2,model_ranking_config_3_0.json,8,4,"[3, 4, 5, 6]","[16, 32, 64, 128]",32,0.5,128,0.012767,0.024931,0.005277,0.014325
3,model_ranking_config_7_0.json,8,4,"[2, 2, 3, 6]","[16, 32, 64, 128]",64,0.5,32,0.013691,0.023403,0.005425,0.014173
1,model_ranking_config_4_0.json,9,4,"[2, 2, 5, 5]","[16, 32, 64, 128]",64,0.5,32,0.013167,0.024265,0.004908,0.014113
7,model_ranking_config_2_0.json,3,3,"[3, 6, 6]","[16, 32, 64]",32,0.5,128,0.012458,0.023739,0.004834,0.013677


In [14]:
best_model_info = df_results.sort_values(by='mean_f1', ascending=False).iloc[0]

In [15]:
best_model_info.to_json('models/CustomCNN/best_model_config.json')

In [16]:
best_model_info

file_name          model_ranking_config_6_0.json
model_id                                      19
num_conv_layers                                4
kernel_sizes                        [4, 5, 6, 6]
channels                       [16, 32, 64, 128]
batch_size                                   128
dropout                                      0.0
dense_hidden                                 128
micro_f1                                0.016786
weighted_f1                              0.02984
samples_f1                              0.006456
mean_f1                                 0.017694
Name: 8, dtype: object