# Export computational performance results
- This script is used to export computational performance results under different configuration.

In [1]:
import re
import pandas as pd
home_path = '/gws/nopw/j04/duicv/yuansun/'

In [3]:
model_list = ['wrf', 'wrf-ctsm', 'wrf-ctsm_1pft', 'wrf-ctsm_1pft1urban']
folder_list = ['wrf', 'wrf-ctsm', 'wrf-ctsm', 'wrf-ctsm']
cpus_list = [1, 2, 4, 6]

In [4]:
def parse_wrf_timing_log(log_path):
    """
    Parse WRF timing information from a log file (e.g., rsl.out.0000).

    Parameters:
        log_path (str): Path to the WRF log file.

    Returns:
        pd.DataFrame: DataFrame containing columns:
                      'timestamp' (datetime), 'domain' (int), 'elapsed_seconds' (float)
    """
    # Regular expression pattern to match the timing lines
    pattern = re.compile(
        r'Timing for main: time (\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2}) on domain\s+(\d+):\s+([\d.]+) elapsed seconds'
    )

    timestamps, domains, elapsed_times = [], [], []

    with open(log_path, 'r') as f:
        for line in f:
            match = pattern.search(line)
            if match:
                timestamps.append(pd.to_datetime(match.group(1), format='%Y-%m-%d_%H:%M:%S'))
                domains.append(int(match.group(2)))
                elapsed_times.append(float(match.group(3)))

    return pd.DataFrame({
        'timestamp': timestamps,
        'domain': domains,
        'elapsed_seconds': elapsed_times
    })

In [None]:
model_result_list = []
for m, model in enumerate(model_list):
    cpus_result_list = []
    for cpus in cpus_list:
        log_file = f'{home_path}0_WRFvsWRF-CTSM/output_analysis/computation/scale_analysis/runs_d04_{folder_list[m]}/log_{model}/cpus_{str(cpus)}/rsl.error.0000'
        df_cpus = parse_wrf_timing_log(log_file)
        df_cpus.drop(columns=['domain'], inplace=True)
        df_cpus['cpus'] = cpus
        cpus_result_list.append(df_cpus)
    df_model = pd.concat(cpus_result_list, ignore_index=True)
    df_model['model'] = model
    model_result_list.append(df_model)
df_model_cpus = pd.concat(model_result_list, ignore_index=True)    
df_model_cpus.to_csv('./data_for_figure/timing.csv', index=False)
df_model_cpus.head()

Unnamed: 0,timestamp,elapsed_seconds,cpus,model
0,2021-12-25 00:00:06,1.32844,1,wrf
1,2021-12-25 00:00:12,0.06135,1,wrf
2,2021-12-25 00:00:18,0.0604,1,wrf
3,2021-12-25 00:00:24,0.06087,1,wrf
4,2021-12-25 00:00:30,0.06018,1,wrf
