# Analyze Kaggle Runtime

## Imports

In [None]:
import glob
import os

## Code

In [None]:
def parse_execution_times(log_lines, function_name):
    times = []
    for line in log_lines:
        if f'INFO - wrapper - Executed {function_name} in' in line:
            parts = line.split(' in ')
            if len(parts) > 1:
                time_part = parts[-1].split(' seconds')[0]
                try:
                    times.append(float(time_part))
                except ValueError:
                    continue
    return times

def analyze_folder(folder_path):
    function_names = ['search', 'learn', 'create_peft_model']
    filepaths = glob.glob(os.path.join(folder_path, 'logs', '*.log'))
    times = {fn: [] for fn in function_names}
    for filepath in filepaths:
        with open(filepath, 'r') as f:
            log_data = f.readlines()
        for fn in function_names:
            times[fn].extend(parse_execution_times(log_data, fn))
    sum_times = {fn: sum(times[fn])/3600/3 for fn in function_names}
    print(f'Total times in hours for folder {folder_path}: {sum(sum_times.values()):.2f}h')
    return sum_times

## Analysis

In [None]:
analyze_folder('/mnt/hdd0/Kaggle/arc25/kaggle_outputs/evaluation_512i')

In [None]:
analyze_folder('/mnt/hdd0/Kaggle/arc25/kaggle_outputs/evaluation_128i_1x128')

I expect learn times to be more or less stable, and search times to increase due to inefficient hardware use.
If that is the case it would have sense to group tasks to make predictions.

In [None]:
analyze_folder('/mnt/hdd0/Kaggle/arc25/kaggle_outputs/evaluation_32i_2x32')

In [None]:
analyze_folder('/mnt/hdd0/Kaggle/arc25/kaggle_outputs/evaluation_64i_3x64')