In [9]:
import re
import pandas as pd

def parse_log_file(file_path):
    """
    Parses an experiment log file to extract epoch-level metrics.

    Args:
        file_path (str): The path to the log file.

    Returns:
        pandas.DataFrame: A DataFrame where each row is an epoch and each
                          column is a metric. Returns None if parsing fails.
    """
    try:
        with open(file_path, 'r') as f:
            content = f.read()
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        return None
    except Exception as e:
        print(f"An error occurred while reading the file: {e}")
        return None

    # Each epoch's data is separated by a double line of dashes.
    # We split the content by this delimiter to process each epoch block.
    epoch_blocks = content.strip().split('--------------------------------------------------\n--------------------------------------------------')

    epochs_data = []

    # Regex patterns to find and extract the specific metrics from each block.
    patterns = {
        'Epoch': r"Epoch\s+(\d+)",
        'Time': r"Time\s+([\d.]+)\s+sec",
        'Valid Loss': r"Valid Loss\s+([\d.]+)",
        'Test Loss': r"Test Loss\s+([\d.]+)",
        'MAE': r"MAE:\s+([\d.]+)",
        'Corr': r"Correlation Coefficient:\s+([\d.]+)",
        'mult_acc_7': r"mult_acc_7:\s+([\d.]+)",
        'mult_acc_5': r"mult_acc_5:\s+([\d.]+)",
        'F1_all': r"F1 score all/non0:\s+([\d.]+)/",
        'F1_non0': r"F1 score all/non0:\s+[\d.]+/([\d.]+)",
        'Acc_all': r"Accuracy all/non0:\s+([\d.]+)/",
        'Acc_non0': r"Accuracy all/non0:\s+[\d.]+/([\d.]+)",
    }

    for block in epoch_blocks:
        if "Epoch" not in block:
            continue

        epoch_info = {}
        for key, pattern in patterns.items():
            match = re.search(pattern, block)
            if match:
                # Convert the extracted string value to a float.
                epoch_info[key] = float(match.group(1))
            else:
                epoch_info[key] = None # Store None if a metric isn't found

        if epoch_info.get('Epoch') is not None:
            epochs_data.append(epoch_info)

    if not epochs_data:
        print("No epoch data could be parsed from the file.")
        return None

    # Create a pandas DataFrame from the collected data.
    df = pd.DataFrame(epochs_data)
    
    # Set the 'Epoch' column as the index for clarity.
    df = df.set_index('Epoch')

    # Sort the DataFrame as requested: MAE ascending, Correlation descending.
    df_sorted = df.sort_values(by=['MAE', 'Corr'], ascending=[True, False])

    return df_sorted


In [16]:
file_path = "results/MMA_global.txt"  # Replace with your file path
df = parse_log_file(file_path)
print(df)

          Time  Valid Loss  Test Loss       MAE      Corr  mult_acc_7  \
Epoch                                                                   
11.0   37.6406      0.6996     0.7004  0.700439  0.796145    0.488338   
16.0   56.8401      0.6773     0.7019  0.701871  0.796768    0.479592   
24.0   37.4632      0.6787     0.7067  0.706673  0.794445    0.481050   
22.0   36.9988      0.6807     0.7067  0.706740  0.794355    0.478134   
25.0   37.7399      0.6781     0.7071  0.707124  0.794480    0.483965   
12.0   37.8700      0.6860     0.7073  0.707308  0.796730    0.491254   
13.0   37.5135      0.6822     0.7078  0.707765  0.796401    0.488338   
23.0   36.9700      0.6793     0.7079  0.707901  0.794328    0.481050   
21.0   37.0824      0.6798     0.7087  0.708671  0.793891    0.479592   
15.0   54.0896      0.6784     0.7096  0.709554  0.795181    0.478134   
20.0   37.6523      0.6797     0.7098  0.709788  0.794004    0.482507   
17.0   36.8245      0.6839     0.7116  0.711622  0.

In [17]:
df.head(5)

Unnamed: 0_level_0,Time,Valid Loss,Test Loss,MAE,Corr,mult_acc_7,mult_acc_5,F1_all,F1_non0,Acc_all,Acc_non0
Epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
11.0,37.6406,0.6996,0.7004,0.700439,0.796145,0.488338,0.55102,0.8233,0.8417,0.8236,0.8415
16.0,56.8401,0.6773,0.7019,0.701871,0.796768,0.479592,0.536443,0.8306,0.8493,0.8309,0.8491
24.0,37.4632,0.6787,0.7067,0.706673,0.794445,0.48105,0.542274,0.8306,0.8493,0.8309,0.8491
22.0,36.9988,0.6807,0.7067,0.70674,0.794355,0.478134,0.539359,0.8291,0.8478,0.8294,0.8476
25.0,37.7399,0.6781,0.7071,0.707124,0.79448,0.483965,0.54519,0.8291,0.8478,0.8294,0.8476
