In [1]:
import pandas as pd

# Ax MLP

In [3]:
import re
import pandas as pd

# Load the file content
file_path = '../../../plotting_data/raw/mlp_AX_27Nov_Paper_150it_7496822.err'

# Define lists to collect data
iterations, trials, f1_scores = [], [], []

# Define regex patterns to extract iteration, trial, and F1 score
iteration_pattern = re.compile(r"Starting Ax optimization iteration (\d+) of \d+")
trial_pattern = re.compile(r"Running optimization trial (\d+)\.\.\.")
f1_score_pattern = re.compile(r"Trial \d+ complete - Average F1 Score: ([0-9.]+)")

# Read the log file line by line and extract data
with open(file_path, 'r') as file:
    current_iteration = None
    for line in file:
        iteration_match = iteration_pattern.search(line)
        if iteration_match:
            current_iteration = int(iteration_match.group(1))
        
        trial_match = trial_pattern.search(line)
        if trial_match and current_iteration is not None:
            current_trial = int(trial_match.group(1))
        
        f1_score_match = f1_score_pattern.search(line)
        if f1_score_match:
            f1_score = float(f1_score_match.group(1))
            iterations.append(current_iteration)
            trials.append(current_trial)
            f1_scores.append(f1_score)

# Create a DataFrame
df = pd.DataFrame({
    'Iteration': iterations,
    'Trial': trials,
    'F1_Score': f1_scores
})



In [4]:
df

Unnamed: 0,Iteration,Trial,F1_Score
0,1,1,0.2615
1,1,2,0.4043
2,1,3,0.5128
3,1,4,0.4171
4,1,5,0.3903
...,...,...,...
14995,100,146,0.5777
14996,100,147,0.5767
14997,100,148,0.5761
14998,100,149,0.5807


In [8]:
import re
import pandas as pd

# Load the file content
file_path = '../../../plotting_data/raw/mlp_AX_27Nov_Paper_150it_7496822.err'

# Define lists to collect data
iterations, trials, f1_scores = [], [], []

# Define a dictionary to collect time taken for each iteration
iteration_times = {}

# Define regex patterns to extract iteration, trial, F1 score, and time
iteration_pattern = re.compile(r"Starting Ax optimization iteration (\d+) of \d+")
trial_pattern = re.compile(r"Running optimization trial (\d+)\.\.\.")
f1_score_pattern = re.compile(r"Trial \d+ complete - Average F1 Score: ([0-9.]+)")
time_pattern = re.compile(r"Iteration (\d+) completed\. Time taken: ([0-9.]+) seconds")

# Read the log file line by line and extract data
with open(file_path, 'r') as file:
    current_iteration = None
    current_trial = None
    for line in file:
        # Check for iteration start
        iteration_match = iteration_pattern.search(line)
        if iteration_match:
            current_iteration = int(iteration_match.group(1))
            continue  # Move to next line after updating iteration

        # Check for trial start
        trial_match = trial_pattern.search(line)
        if trial_match and current_iteration is not None:
            current_trial = int(trial_match.group(1))
            continue  # Move to next line after updating trial

        # Check for F1 score
        f1_score_match = f1_score_pattern.search(line)
        if f1_score_match and current_iteration is not None and current_trial is not None:
            f1_score = float(f1_score_match.group(1))
            iterations.append(current_iteration)
            trials.append(current_trial)
            f1_scores.append(f1_score)
            continue  # Move to next line after recording F1 score

        # Check for time taken
        time_match = time_pattern.search(line)
        if time_match:
            iter_num = int(time_match.group(1))
            time_taken = float(time_match.group(2))
            iteration_times[iter_num] = time_taken
            continue  # Move to next line after recording time

# Create a DataFrame with the extracted data
df = pd.DataFrame({
    'Iteration': iterations,
    'Trial': trials,
    'F1_Score': f1_scores
})

# Map the Time taken to each trial based on the Iteration
df['Time'] = df['Iteration'].map(iteration_times)

# Optional: Handle any missing Time values if necessary
# For example, you can fill missing times with NaN or a default value
# df['Time'] = df['Time'].fillna(0)  # Example: fill missing times with 0

# Create a summary DataFrame with the best F1_Score and Time for each iteration
summary_df = df.groupby('Iteration', as_index=False).agg({
    'F1_Score': 'max',
    'Time': 'min'  # Assuming Time is the same for all trials within an iteration
})

# Display the summary DataFrame
summary_df


Unnamed: 0,Iteration,F1_Score,Time
0,1,0.5899,335.04
1,2,0.5877,255.79
2,3,0.5879,245.51
3,4,0.5913,236.79
4,5,0.5875,241.53
...,...,...,...
95,96,0.5897,212.78
96,97,0.5893,266.24
97,98,0.5867,280.66
98,99,0.5879,261.58


In [3]:
df.to_csv('mlp_AX_27Nov_Paper_150it_7496822_ALL_TRIALS.csv', index=False)
df

Unnamed: 0,Iteration,Trial,F1_Score
0,1,1,0.2615
1,1,2,0.4043
2,1,3,0.5128
3,1,4,0.4171
4,1,5,0.3903
...,...,...,...
14995,100,146,0.5777
14996,100,147,0.5767
14997,100,148,0.5761
14998,100,149,0.5807


In [9]:
summary_df.to_csv('mlp_AX_27Nov_Paper_150it_7496822_REPEAT_Summary_TIME.csv', index=False)
summary_df

Unnamed: 0,Iteration,F1_Score,Time
0,1,0.5899,335.04
1,2,0.5877,255.79
2,3,0.5879,245.51
3,4,0.5913,236.79
4,5,0.5875,241.53
...,...,...,...
95,96,0.5897,212.78
96,97,0.5893,266.24
97,98,0.5867,280.66
98,99,0.5879,261.58


In [10]:
import pandas as pd

# Calculate the summary statistics for F1 Score
best_f1_score = summary_df['F1_Score'].max()
worst_f1_score = summary_df['F1_Score'].min()
avg_f1_score = summary_df['F1_Score'].mean()
std_dev_f1_score = summary_df['F1_Score'].std()

# Calculate the summary statistics for Time
avg_time = summary_df['Time'].mean()
std_dev_time = summary_df['Time'].std()

# Create the final summary DataFrame
final_summary = pd.DataFrame({
    'Best F1 Score': [best_f1_score],
    'Worst F1 Score': [worst_f1_score],
    'Average F1 Score': [avg_f1_score],
    'F1 Score Std Dev': [std_dev_f1_score],
    'Average Time': [avg_time],
    'Time Std Dev': [std_dev_time]
})

# Save to CSV
final_summary.to_csv('mlp_AX_27Nov_Paper_150it_7496822_Final_Summary_TIME.csv', index=False)
final_summary


Unnamed: 0,Best F1 Score,Worst F1 Score,Average F1 Score,F1 Score Std Dev,Average Time,Time Std Dev
0,0.5917,0.5835,0.58798,0.001551,296.891,75.301909
