In [25]:
import os
import re
import csv
import glob

# Directory where the log files are stored
log_dir = "/pasteur/appa/homes/bsow/ACSR/src/acsr/logs/grid_search"
# Output CSV file path
output_csv = "grid_search_results.csv"

# Regex patterns to extract configuration and training metrics
config_start_pattern = re.compile(r"CONFIG:")
config_param_pattern = re.compile(r"\s*(\w+)\s*:\s*([\S]+)")
# Training line example:
# Epoch 1/8000, Train Loss: 1.781, Val Loss: 0.908, Accuracy (1-PER): 0.137, Accuracy gestures (1-PER): 0.137, Time: 12.57 sec
epoch_line_pattern = re.compile(
    r"Epoch\s+(\d+)/\d+,\s+Train Loss:\s+([\d.]+),\s+Val Loss:\s+([\d.]+),\s+Accuracy \(1-PER\):\s+([\d.]+),\s+Accuracy gestures \(1-PER\):\s+([\d.]+),\s+Time:\s+([\d.]+)\s+sec"
)

# For storing CSV rows
rows = []

# Get list of all .log files in directory
log_files = glob.glob(os.path.join(log_dir, "*.log"))

for log_file in log_files:
    with open(log_file, "r") as f:
        lines = f.readlines()

    # Dictionaries to hold configuration and best metrics
    config = {}
    best_metrics = {
        "epoch": None,
        "syllable_acc": 0.0,  # assuming higher is better
        "gesture_acc": 0.0,
        # "phoneme_acc": 0.0  # Uncomment if phoneme accuracy is available
    }

    # Step 1: Locate the config block
    in_config = False
    for line in lines:
        if "CONFIG:" in line:
            in_config = True
            continue  # skip the line that just says "CONFIG:"
        if in_config:
            # Stop if we hit an empty line or a line that doesn't match a config parameter format
            if line.strip() == "" or not config_param_pattern.match(line):
                in_config = False
                continue
            match = config_param_pattern.match(line)
            if match:
                key, value = match.groups()
                config[key] = value

    # Step 2: Parse training lines and find best epoch based on syllable accuracy ("Accuracy (1-PER)")
    for line in lines:
        match = epoch_line_pattern.search(line)
        if match:
            epoch, train_loss, val_loss, syllable_acc, gesture_acc, time_sec = match.groups()
            syllable_acc = float(syllable_acc)
            # If you had phoneme accuracy in the logs, add extraction logic here.
            if syllable_acc > best_metrics["syllable_acc"]:
                best_metrics["epoch"] = int(epoch)
                best_metrics["syllable_acc"] = syllable_acc
                best_metrics["gesture_acc"] = float(gesture_acc)
                # best_metrics["phoneme_acc"] = ...  # if applicable

    # Prepare a row for the CSV
    # Start with filename
    row = {"filename": os.path.basename(log_file)}
    # Add configuration values to the row (you can choose which ones to include)
    row.update(config)
    # Add best metrics
    row["best_epoch"] = best_metrics["epoch"]
    row["best_syllable_accuracy"] = best_metrics["syllable_acc"]
    row["best_gesture_accuracy"] = best_metrics["gesture_acc"]
    # row["best_phoneme_accuracy"] = best_metrics.get("phoneme_acc", "")  # if available

    rows.append(row)

# Determine the fieldnames for CSV (collect all keys from rows)
fieldnames = set()
for row in rows:
    fieldnames.update(row.keys())
fieldnames = list(fieldnames)

# Write out the CSV
with open(output_csv, "w", newline="") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(rows)

print(f"CSV file '{output_csv}' created with results from {len(log_files)} log files.")


CSV file 'grid_search_results.csv' created with results from 64 log files.


In [26]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("grid_search_results.csv")

# Find the row with the best syllable level accuracy
best_row = df.loc[df["best_syllable_accuracy"].idxmax()]

# Display the row
print(best_row)


filename                  grid_search_array_10439930_2.log
optimizer                                             adam
epochs                                                8000
encoder_hidden_dim                                     128
best_epoch                                            1069
batch_size                                              16
best_gesture_accuracy                                0.877
n_layers_gru                                             1
device                                                cuda
alpha                                                  0.5
learning_rate                                        0.001
best_syllable_accuracy                               0.752
level                                            syllables
output_dim                                             329
Name: 5, dtype: object


In [16]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("grid_search_results.csv")

# Find the row with the best syllable level accuracy
best_row = df.loc[df["best_syllable_accuracy"].idxmax()]

# Display the row
print(best_row)


filename                  grid_search_array_10387325_9.log
optimizer                                             adam
epochs                                                8000
encoder_hidden_dim                                     256
best_epoch                                             326
batch_size                                              16
best_gesture_accuracy                                0.832
n_layers_gru                                             3
device                                                cuda
alpha                                                  1.0
learning_rate                                        0.001
best_syllable_accuracy                               0.679
level                                            syllables
output_dim                                             329
Name: 50, dtype: object
