In [1]:
import pandas as pd
import os

def combine_csvs(input_directory, output_file):
    """
    Combines all single-row CSV files from a directory into a single CSV file.

    Args:
        input_directory (str): The path to the directory containing the CSV files.
        output_file (str): The path to the output CSV file.
    """
    # Create an empty list to store the dataframes from each CSV file
    all_dataframes = []

    # Get a list of all files in the specified directory
    try:
        files = os.listdir(input_directory)
    except FileNotFoundError:
        print(f"Error: The directory '{input_directory}' was not found.")
        return

    # Iterate over each file in the directory
    for file in files:
        # Check if the file is a CSV file
        if file.endswith('.csv'):
            file_path = os.path.join(input_directory, file)
            print(f"Reading {file_path}...")
            
            try:
                # Read the CSV file into a pandas DataFrame
                df = pd.read_csv(file_path)
                # Append the DataFrame to our list
                all_dataframes.append(df)
            except Exception as e:
                print(f"Error reading {file_path}: {e}")

    # Check if any CSVs were found and read
    if not all_dataframes:
        print("No CSV files found in the directory. The output file will not be created.")
        return

    # Concatenate all dataframes in the list into a single DataFrame
    # The 'ignore_index=True' argument creates a new, clean index for the combined DataFrame
    combined_df = pd.concat(all_dataframes, ignore_index=True)

    # Save the combined DataFrame to the specified output file
    # 'index=False' prevents pandas from writing the DataFrame index as a column
    combined_df.to_csv(output_file, index=False)
    print(f"\nSuccessfully combined {len(all_dataframes)} files into {output_file}")

combine_csvs('results\decoder', 'results.csv')

Reading results\decoder\run_20250812_174450_113eaef29e7946c1ae3398cf87480c04.csv...
Reading results\decoder\run_20250812_174729_9bbf31e7177b42248ad542f57f681562.csv...
Reading results\decoder\run_20250812_174935_2b4b2ac4fa94415aba6edc9e0c83823c.csv...
Reading results\decoder\run_20250812_175831_f54967df664340b8984b5c46487bf201.csv...
Reading results\decoder\run_20250812_175950_62ef8730594841b997ce39b7308ea946.csv...
Reading results\decoder\run_20250812_180019_7d6156481a9446f9af0928ef6837c15d.csv...
Reading results\decoder\run_20250812_180054_5e007c6cc5254c29a8e91470feec737d.csv...
Reading results\decoder\run_20250812_180112_866d6485511b40acbe3fafa7bff8d687.csv...
Reading results\decoder\run_20250812_180325_020684e3686b4afd82373b29444dddb3.csv...
Reading results\decoder\run_20250812_180620_54e1a180f4c346fcac39392596d4025c.csv...
Reading results\decoder\run_20250812_180912_45e288b0627b4ecea3b28a6fb0a0e494.csv...
Reading results\decoder\run_20250812_181433_8746e4b1fab24e1592f2257f3e492d7f

In [5]:
df = pd.read_csv('results.csv')
print(df.columns)
df.sort_values(by='avg_mse', ascending=True, inplace=True)
print(df.head())

Index(['timestamp', 'pretrained_model_path', 'spange_path', 'nn_size',
       'dropout_fp', 'dropout_nn', 'epochs', 'hidden_factor', 'val_percent',
       'avg_mse', 'mse_per_solvent'],
      dtype='object')
           timestamp                              pretrained_model_path  \
835  20250813_001113  val_loss0.1074_DPR_0.1_MP_0.3_DM_64_TL_5_heads...   
787  20250813_000240  val_loss0.1074_DPR_0.1_MP_0.3_DM_64_TL_5_heads...   
970  20250813_003138  val_loss0.1074_DPR_0.1_MP_0.3_DM_64_TL_5_heads...   
858  20250813_001629  val_loss0.1074_DPR_0.1_MP_0.3_DM_64_TL_5_heads...   
106  20250812_195548  val_loss0.1074_DPR_0.1_MP_0.3_DM_64_TL_5_heads...   

         spange_path  nn_size  dropout_fp  dropout_nn  epochs  hidden_factor  \
835  spange_melt.csv       64         0.1         0.1      40              4   
787  spange_melt.csv       64         0.1         0.1      40              4   
970  spange_melt.csv       64         0.1         0.1      40              4   
858  spange_melt.csv 

In [16]:
best_result = df['mse_per_solvent'][835]
avg_mse = df['avg_mse'][835]
import json

# The string from your DataFrame
best_result_string = '{"1,1,1,3,3,3-Hexafluoropropan-2-ol": 0.03080253863775014, "2,2,2-Trifluoroethanol": 0.018216528789509766, "2-Methyltetrahydrofuran [2-MeTHF]": 0.0011907749498236495, "Acetonitrile": 0.005377167506390707, "Acetonitrile.Acetic Acid": 0.01860430733058326, "Butanone [MEK]": 0.00240828060254048, "Cyclohexane": 0.00501082238966579, "DMA [N,N-Dimethylacetamide]": 0.0015139932707898818, "Decanol": 0.00452791123491232, "Diethyl Ether [Ether]": 0.00505641332234017, "Dihydrolevoglucosenone (Cyrene)": 0.0029882592831882893, "Dimethyl Carbonate": 0.004849278559091872, "Ethanol": 0.0035202915601509283, "Ethyl Acetate": 0.0017182933827203637, "Ethyl Lactate": 0.0026543763028039477, "Ethylene Glycol [1,2-Ethanediol]": 0.0005182108506159672, "IPA [Propan-2-ol]": 0.009248542678373594, "MTBE [tert-Butylmethylether]": 0.0022221935586135447, "Methanol": 0.004678504093123334, "Methyl Propionate": 0.0053640953843067845, "THF [Tetrahydrofuran]": 0.0008504595243112037, "Water.2,2,2-Trifluoroethanol": 0.000918939611606202, "Water.Acetonitrile": 0.0043091316485985055, "tert-Butanol [2-Methylpropan-2-ol]": 0.0015537975202841881}'

# Convert the string to a dictionary
best_result = json.loads(best_result)

# Now best_result is a dictionary and you can iterate over it
total = 0
for key, value in best_result.items():
    print('SOLVENT: ', key, " MSE ", value)
    total += value

total -= best_result['Acetonitrile.Acetic Acid']
new_mse = total/(len(best_result) -1)
print('MSE with cetonitrile.Acetic Acid removed: ', avg_mse)
print('MSE with cetonitrile.Acetic Acid removed: ', new_mse)

SOLVENT:  1,1,1,3,3,3-Hexafluoropropan-2-ol  MSE  0.03080253863775014
SOLVENT:  2,2,2-Trifluoroethanol  MSE  0.018216528789509766
SOLVENT:  2-Methyltetrahydrofuran [2-MeTHF]  MSE  0.0011907749498236495
SOLVENT:  Acetonitrile  MSE  0.005377167506390707
SOLVENT:  Acetonitrile.Acetic Acid  MSE  0.01860430733058326
SOLVENT:  Butanone [MEK]  MSE  0.00240828060254048
SOLVENT:  Cyclohexane  MSE  0.00501082238966579
SOLVENT:  DMA [N,N-Dimethylacetamide]  MSE  0.0015139932707898818
SOLVENT:  Decanol  MSE  0.00452791123491232
SOLVENT:  Diethyl Ether [Ether]  MSE  0.00505641332234017
SOLVENT:  Dihydrolevoglucosenone (Cyrene)  MSE  0.0029882592831882893
SOLVENT:  Dimethyl Carbonate  MSE  0.004849278559091872
SOLVENT:  Ethanol  MSE  0.0035202915601509283
SOLVENT:  Ethyl Acetate  MSE  0.0017182933827203637
SOLVENT:  Ethyl Lactate  MSE  0.0026543763028039477
SOLVENT:  Ethylene Glycol [1,2-Ethanediol]  MSE  0.0005182108506159672
SOLVENT:  IPA [Propan-2-ol]  MSE  0.009248542678373594
SOLVENT:  MTBE [te