In [1]:
import wandb
import pandas as pd

api = wandb.Api(overrides={'base-url': "https://api.wandb.ai"})
runs = api.runs("point-process/llmforecast")

In [2]:
print(runs)

<Runs point-process/llmforecast>


In [20]:
def create_summary_table(runs, row_name, multi_column_name, column_name, metric):
    # Initialize a dictionary for storing the lowest metric values
    lowest_metrics = {}
    run_mapping = {}

    # Find the run with the lowest metric for each combination
    for run in runs:
        try:
            name = run.config[row_name]
            dataset = run.config[multi_column_name]
            window_size = run.config[column_name] if column_name else None
            metric_value = float(run.summaryMetrics[metric])
        except KeyError:
            continue

        key = (name, dataset) if column_name is None else (name, dataset, window_size)

        if key not in lowest_metrics:
            lowest_metrics[key] = metric_value
            run_mapping[key] = run

    # Prepare the data structure for DataFrame creation
    index = sorted(set(key[0] for key in run_mapping.keys()))
    if column_name:
        datasets = sorted(set(key[1] for key in run_mapping.keys()))
        window_sizes = sorted(set(key[2] for key in run_mapping.keys()))
        multi_columns = pd.MultiIndex.from_product([datasets, window_sizes], names=[multi_column_name, column_name])
    else:
        multi_columns = sorted(set(key[1] for key in run_mapping.keys()))

    # Create the DataFrame
    results_df = pd.DataFrame(index=index, columns=multi_columns)

    # Populate the DataFrame
    for key, run in run_mapping.items():
        if column_name:
            results_df.at[key[0], (key[1], key[2])] = run.summaryMetrics[metric]
        else:
            results_df.at[key[0], key[1]] = run.summaryMetrics[metric]

    # Drop rows and columns with all NaN values
    results_df.dropna(axis=0, how='all', inplace=True)
    results_df.dropna(axis=1, how='all', inplace=True)

    return results_df

In [21]:
results_df = create_summary_table(runs, 'name', 'dataset', 'window_size', 'rmse')
results_df

dataset,Climate,Climate,Climate,Climate,Mimic,Mimic,Mimic,Mimic,Yelp,Yelp,Yelp,Yelp
window_size,7,9,14,30,5,7,9,14,4,8,12,24
Llama-2-7b-chat-hf,0.179627,0.151682,,,0.154039,0.186128,0.183625,,0.75105,0.771902,0.888398,
Mistral-7B-Instruct-v0.1,0.183741,0.264709,,,0.15644,0.190862,0.262242,,0.736506,0.777644,0.881798,
arima,0.280252,0.200725,0.138294,0.137508,0.241965,0.274563,0.216994,0.198986,1.191419,1.3808,0.878099,0.800031
avg,0.11711,0.115335,0.118583,0.139972,0.140004,0.147728,0.154714,0.174503,0.704788,0.693359,0.701275,0.710008
ets,0.214807,0.20287,0.168499,0.164285,0.209988,0.207661,0.20871,0.212283,1.367694,1.334058,1.020702,1.160937
gemma-7b-it,0.135962,0.257995,,,0.166181,0.181875,0.269328,,0.877593,1.419001,1.490367,
last,0.1405,0.141434,0.142096,0.153495,0.151323,0.158277,0.164154,0.177461,0.8442,0.866989,0.883912,0.914723
llama,45.96401,40.331071,37.557077,33.482239,2134.282728,1373.864837,868.926823,,,,,
llmtime,0.119366,0.124417,,,0.127848,0.144255,0.151237,,0.58706,0.663245,0.693942,
lltime,0.73183,7.717368,,,0.203834,0.191714,0.205618,,1.439553,1.953058,1.105995,0.985528


In [22]:
def postprocess_df(df, rows_to_remove, columns_to_remove, precision):
    # Define a lambda to round and format the float to the desired precision
    format_float = lambda x: f"{x:.{precision}f}" if isinstance(x, float) else x
    
    # Remove specified rows and columns
    df = df.drop(index=rows_to_remove)
    df = df.drop(columns=columns_to_remove, axis=1)
    
    # Convert all values to the specified precision
    df = df.applymap(lambda x: format_float(round(float(x), precision)) if pd.notnull(x) and isinstance(x, (int, float)) else x)
    
    # Define a function to apply bold styling to the minimum value in each column
    def highlight_min(s):
        s = pd.to_numeric(s, errors='coerce')
        is_min = s == s.min()
        return ['font-weight: bold' if v else '' for v in is_min]
    
    # Apply the styling with the Styler object
    styled_df = df.style.apply(highlight_min, axis=0)
    
    return styled_df, df


# Example usage:
rows_to_remove = ['llama', 'm2zeroshot'] # replace with your actual row values to remove
columns_to_remove = [('Yelp', 24), ('Mimic', 14), ('Climate', 30), ('Climate', 14)] # replace with your actual column values to remove
precision = 3

processed_df, df = postprocess_df(results_df, rows_to_remove, columns_to_remove, precision)
processed_df

  df = df.applymap(lambda x: format_float(round(float(x), precision)) if pd.notnull(x) and isinstance(x, (int, float)) else x)


dataset,Climate,Climate,Mimic,Mimic,Mimic,Yelp,Yelp,Yelp
window_size,7,9,5,7,9,4,8,12
Llama-2-7b-chat-hf,0.18,0.152,0.154,0.186,0.184,0.751,0.772,0.888
Mistral-7B-Instruct-v0.1,0.184,0.265,0.156,0.191,0.262,0.737,0.778,0.882
arima,0.28,0.201,0.242,0.275,0.217,1.191,1.381,0.878
avg,0.117,0.115,0.14,0.148,0.155,0.705,0.693,0.701
ets,0.215,0.203,0.21,0.208,0.209,1.368,1.334,1.021
gemma-7b-it,0.136,0.258,0.166,0.182,0.269,0.878,1.419,1.49
last,0.141,0.141,0.151,0.158,0.164,0.844,0.867,0.884
llmtime,0.119,0.124,0.128,0.144,0.151,0.587,0.663,0.694
lltime,0.732,7.717,0.204,0.192,0.206,1.44,1.953,1.106
lltime-mistral7b-v0,0.743,0.339,0.274,0.363,0.333,1.168,1.593,


In [11]:
df.to_csv('results.csv')