# Trials

In [None]:
import pandas as pd
import re

# Define the filename and path
filename = 'gramacy_Hyperopt_11Jan25_7704032'
filename_hyperopt = f"../../../raw/{filename}.log"

# Initialize variables to store the data
data = []
current_iteration = None

# Define regular expression patterns
# Pattern to identify the start of an iteration
iteration_pattern = re.compile(r"===== Running iteration (\d+) of \d+ =====")

# Pattern to identify trial lines with MSE values
trial_pattern = re.compile(r"Trial\s+(\d+)/\d+\s+=>\s+MSE:\s+([\d.]+)")

# Open and read the log file line by line
with open(filename_hyperopt, 'r') as file:
    for line in file:
        # Check if the line indicates the start of a new iteration
        iter_match = iteration_pattern.search(line)
        if iter_match:
            current_iteration = int(iter_match.group(1))
            continue  # Move to the next line

        # Check if the line contains trial information
        trial_match = trial_pattern.search(line)
        if trial_match and current_iteration is not None:
            trial_num = int(trial_match.group(1))
            mse = float(trial_match.group(2))
            data.append({
                'Iteration': current_iteration,
                'Trial': trial_num,
                'MSE': mse
            })

# Create the DataFrame from the collected data
df_trials = pd.DataFrame(data)

# Optional: Sort the DataFrame by Iteration and Trial for better readability
df_trials.sort_values(by=['Iteration', 'Trial'], inplace=True)

# Reset the index of the DataFrame
df_trials.reset_index(drop=True, inplace=True)


# (Optional) Save the DataFrame to a CSV file
#save as 'f filename_trials.csv'
df_trials.to_csv(f'{filename}_TRIALS.csv', index=False)

# Display the first few rows of the DataFrame
df_trials


# Iterations

In [None]:
import pandas as pd
import re


filename_hyperopt = f"../../../raw/{filename}.log"

# Initialize variables to store the data
iterations_data = []
current_iteration = None
current_time = None
current_best_mse = None

# Define regular expression patterns
# Pattern to identify the start of an iteration
iteration_start_pattern = re.compile(r"===== Running iteration (\d+) of \d+ =====")

# Pattern to identify the completion time of an iteration
iteration_time_pattern = re.compile(r"\[Iteration\s+(\d+)\]\s+Completed\s+in\s+([\d.]+)\s+seconds\.")

# Pattern to identify the best MSE of an iteration
iteration_best_mse_pattern = re.compile(r"\[Iteration\s+(\d+)\]\s+Best\s+MSE:\s+([\d.]+)")

# Open and read the log file line by line
with open(filename_hyperopt, 'r') as file:
    for line in file:
        # Check if the line indicates the start of a new iteration
        iter_start_match = iteration_start_pattern.search(line)
        if iter_start_match:
            current_iteration = int(iter_start_match.group(1))
            continue  # Move to the next line

        # Check if the line contains the completion time
        iter_time_match = iteration_time_pattern.search(line)
        if iter_time_match:
            iter_num = int(iter_time_match.group(1))
            time_taken = float(iter_time_match.group(2))
            if current_iteration == iter_num:
                current_time = time_taken
            continue  # Move to the next line

        # Check if the line contains the best MSE
        iter_best_mse_match = iteration_best_mse_pattern.search(line)
        if iter_best_mse_match:
            iter_num = int(iter_best_mse_match.group(1))
            best_mse = float(iter_best_mse_match.group(2))
            if current_iteration == iter_num:
                current_best_mse = best_mse

            # Once both time and best MSE are found, append to data
            if current_iteration is not None and current_time is not None and current_best_mse is not None:
                iterations_data.append({
                    'Iteration': current_iteration,
                    'MSE': current_best_mse,
                    'Time': current_time
                })
                # Reset for the next iteration
                current_iteration = None
                current_time = None
                current_best_mse = None

# Create the DataFrame from the collected data
df_iterations = pd.DataFrame(iterations_data)

# Optional: Sort the DataFrame by Iteration for better readability
df_iterations.sort_values(by='Iteration', inplace=True)

# Reset the index of the DataFrame
df_iterations.reset_index(drop=True, inplace=True)

# save as 'f filename_iterations.csv'
df_iterations.to_csv(f'{filename}_ITERATIONS.csv', index=False)

# Display the DataFrame
df_iterations
# (Optional) Save the DataFrame to a CSV file
# df_iterations.to_csv('parsed_iterations.csv', index=False)


# Final Summary

In [None]:
# Calculate summary statistics for Min_MSE and Time
mse_summary = {
    'Best MSE': [df_iterations['MSE'].min()],  # Since lower MSE is better
    'Worst MSE': [df_iterations['MSE'].max()],
    'Average MSE': [df_iterations['MSE'].mean()],
    'MSE StdDev': [df_iterations['MSE'].std()],
    'Average Time': [df_iterations['Time'].mean()],
    'Time StdDev': [df_iterations['Time'].std()]
}

# save as 'f filename_FINAL_SUMMARY.csv'


mse_summary_df = pd.DataFrame(mse_summary)

mse_summary_df.to_csv(f'{filename}_FINALSUMMARY.csv', index=False)
mse_summary_df