In [11]:
import pandas as pd
import glob

import pandas as pd
import re
import os

def parse_filename(file_name):
    """
    Extracts dataset type and sample rate from the file name.
    Expected format: lstm_grid_DS_<DATASET>_<SAMPLERATE>_<DATE>.csv
    """
    file_name_split = file_name.split("_")
    ds = file_name_split[3]
    ds_type = "MFCC" if "MFCC" in file_name else "STFT"
    sr = int(file_name_split[-3])

    return ds, sr, ds_type

def combine_top_models(file_list):
    """
    Combines the highest F1 score row from each CSV file with metadata.

    Parameters:
        file_list (list of str): List of file paths.

    Returns:
        pd.DataFrame: Combined DataFrame with best models and metadata.
    """
    top_models = []

    for file_name in file_list:
        df = pd.read_csv(file_name)
        dataset, sample_rate, ds_type = parse_filename(file_name)
        top_row = df.loc[df['f1_score'].idxmax()].copy()
        top_row['dataset'] = dataset
        top_row['file_type'] = ds_type
        top_row['sample_rate'] = sample_rate
        top_row['source_file'] = file_name
        top_models.append(top_row)

    return pd.DataFrame(top_models)

def get_csv_filenames_in_current_dir(pattern="lstm_grid_DS_*.csv"):
    """
    Returns a list of CSV file names in the current directory matching the pattern.

    Parameters:
        pattern (str): Glob pattern to match filenames. Default targets 'lstm_grid_DS_*.csv'.

    Returns:
        list of str: List of matching filenames.
    """
    current_dir = "."
    search_pattern = os.path.join(current_dir, pattern)
    return glob.glob(search_pattern)

# Example usage
if __name__ == "__main__":
    file_paths = get_csv_filenames_in_current_dir()
    best_models = combine_top_models(file_paths)
    print(best_models)
    # Optionally save:
    best_models.to_csv("top_models_summary.csv", index=False)

   sample_rate  hidden_dim  fc_dim  f1_score dataset file_type  \
0         1000        64.0    32.0  0.342908     ESC      STFT   
1        16000        64.0    64.0  0.572695     ESC      STFT   
0         2000        64.0    32.0  0.338211     ESC      STFT   
0        22000        64.0    32.0  0.614090     ESC      STFT   
1         4000        64.0    64.0  0.598489     ESC      STFT   
2         8000       128.0    32.0  0.521173     ESC      STFT   
1         1000        64.0    64.0  0.590669     ESC      MFCC   
1        16000        64.0    64.0  0.630564     ESC      MFCC   
2         2000       128.0    32.0  0.642691     ESC      MFCC   
3        22000       128.0    64.0  0.612432     ESC      MFCC   
3         4000       128.0    64.0  0.673003     ESC      MFCC   
1         8000        64.0    64.0  0.628266     ESC      MFCC   
3         1000       128.0    64.0  0.339196     U8K      STFT   
1        16000        64.0    64.0  0.627251     U8K      STFT   
2         