In [None]:
import os
import pandas as pd

# Base path for the experiments
results_dir = "/ceph01/projects/AGRamirez_misc/carpeta_alberto_moreno/GenNet/results"

# List to store the hyperparameter records
hyperparameter_records = []

# Traverse all directories and files
for root, dirs, files in os.walk(results_dir):
    for file in files:
        if file == "results_summary.txt":
            # Extract the experiment ID from the path
            experiment_id = root.split('_')[-2]  
            file_path = os.path.join(root, file)
            with open(file_path, 'r') as f:
                record = {'experiment_id': experiment_id}
                confusionmatrix_val = []
                confusionmatrix_test = []
                is_reading_val = False
                is_reading_test = False
                for line in f:
                    line = line.strip()  # Remove whitespace and newline characters

                    # Extract information about the parameters
                    if "Jobid:" in line:
                        record['Jobid'] = line.split(":")[1].strip()
                    elif "Batchsize:" in line:
                        record['Batchsize'] = int(line.split(":")[1].strip())
                    elif "Learning rate:" in line:
                        record['Learning rate'] = float(line.split(":")[1].strip())
                    elif "L1 value:" in line:
                        record['L1 value'] = float(line.split(":")[1].strip())
                    elif "AUC validation:" in line:
                        record['AUC validation'] = float(line.split(":")[1].strip())
                    elif "AUC test:" in line:
                        record['AUC test'] = float(line.split(":")[1].strip())
                    elif "patience:" in line:
                        record['Patience'] = int(line.split(":")[1].strip())

                    # Process the confusion matrices
                    elif "confusionmatrix_val:" in line:
                        # Extract the first row of the validation confusion matrix
                        line = line.replace("confusionmatrix_val:", "").strip()
                        confusionmatrix_val.append(line)
                        is_reading_val = True
                        is_reading_test = False
                    elif is_reading_val and len(confusionmatrix_val) < 2:
                        # Extract the second row of the validation confusion matrix
                        confusionmatrix_val.append(line)
                        if len(confusionmatrix_val) == 2:
                            is_reading_val = False
                    elif "confusionmatrix_test:" in line:
                        # Extract the first row of the test confusion matrix
                        line = line.replace("confusionmatrix_test:", "").strip()
                        confusionmatrix_test.append(line)
                        is_reading_test = True
                        is_reading_val = False
                    elif is_reading_test and len(confusionmatrix_test) < 2:
                        # Extract the second row of the test confusion matrix
                        confusionmatrix_test.append(line)
                        if len(confusionmatrix_test) == 2:
                            is_reading_test = False

                # Join the rows of the confusion matrices
                record['confusionmatrix_val'] = ' '.join(confusionmatrix_val)
                record['confusionmatrix_test'] = ' '.join(confusionmatrix_test)

            # Check if the file model_architecture.txt exists in the same folder
            architecture_file = os.path.join(root, 'model_architecture.txt')
            if os.path.exists(architecture_file):
                with open(architecture_file, 'r') as arch_file:
                    architecture_content = arch_file.read()
                    if "Dropout" in architecture_content:
                        record['dropout'] = 0.5
                    else:
                        record['dropout'] = 0
            else:
                record['dropout'] = 0

            hyperparameter_records.append(record)

# Verify and add records for experiments without results file
for root, dirs, files in os.walk(results_dir):
    for dir in dirs:
        experiment_id = dir.split('_')[-2]
        existing_ids = [record['experiment_id'] for record in hyperparameter_records]
        if experiment_id not in existing_ids:
            record = {
                'experiment_id': experiment_id,
                'Jobid': 0,
                'Batchsize': 0,
                'Learning rate': 0.0,
                'L1 value': 0.0,
                'AUC validation': 0.0,
                'AUC test': 0.0,
                'Patience': 0,
                'confusionmatrix_val': "0 0 0 0",
                'confusionmatrix_test': "0 0 0 0",
                'dropout': 0
            }
            hyperparameter_records.append(record)

# Convert the list of records into a Pandas DataFrame
df_hyperparameters = pd.DataFrame(hyperparameter_records)

# Filter and save results for experiments starting with these digits
for prefix in ['11','12','13','14']:  
    # Filter experiments starting with the prefix
    df_filtered = df_hyperparameters[df_hyperparameters['experiment_id'].str.startswith(prefix)]
    
    # Sort by 'AUC test' from highest to lowest
    df_filtered = df_filtered.sort_values(by='AUC test', ascending=False)
    
    # Reorder columns for better visualization if necessary
    df_filtered = df_filtered[['experiment_id', 'Jobid', 'Batchsize', 'Learning rate', 
                               'L1 value', 'Patience', 'AUC validation', 'AUC test', 
                               'confusionmatrix_val', 'confusionmatrix_test', 'dropout']]
    
    # Save to a CSV file with comma delimiter
    output_file = os.path.join(results_dir, f"summary_results_{prefix}.csv")
    df_filtered.to_csv(output_file, index=False, sep=',')
    print(f"Summary of results saved to: {output_file}")
