In [1]:
import wandb
import pandas as pd

api = wandb.Api(overrides={'base-url': "https://rosewandb.ucsd.edu"})
runs = api.runs("cht028/Inference")

In [7]:
print(runs)

<Runs cht028/Inference>


In [12]:
def create_summary_table(runs, row_name, multi_column_name, column_name, metric, split):
    # Initialize a dictionary for storing the lowest metric values
    lowest_metrics = {}
    run_mapping = {}

    # Find the run with the lowest metric for each combination
    for run in runs:
        try:
            name = run.config[row_name]
            dataset = run.config[multi_column_name]
            window_size = run.config[column_name] if column_name else None
            metric_value = float(run.summaryMetrics[metric])
            data_split = run.config["split"]
        except KeyError:
            continue

        key = (name, dataset) if column_name is None else (name, dataset, window_size)

        if key not in lowest_metrics and data_split == split:
            lowest_metrics[key] = metric_value
            run_mapping[key] = run

    # Prepare the data structure for DataFrame creation
    index = sorted(set(key[0] for key in run_mapping.keys()))
    if column_name:
        datasets = sorted(set(key[1] for key in run_mapping.keys()))
        window_sizes = sorted(set(key[2] for key in run_mapping.keys()))
        multi_columns = pd.MultiIndex.from_product([datasets, window_sizes], names=[multi_column_name, column_name])
    else:
        multi_columns = sorted(set(key[1] for key in run_mapping.keys()))

    # Create the DataFrame
    results_df = pd.DataFrame(index=index, columns=multi_columns)

    # Populate the DataFrame
    for key, run in run_mapping.items():
        if column_name:
            results_df.at[key[0], (key[1], key[2])] = run.summaryMetrics[metric]
        else:
            results_df.at[key[0], key[1]] = run.summaryMetrics[metric]

    # Drop rows and columns with all NaN values
    results_df.dropna(axis=0, how='all', inplace=True)
    results_df.dropna(axis=1, how='all', inplace=True)

    return results_df

In [13]:
results_df = create_summary_table(runs, 'case', 'dataset', 'window_size', 'Meteor Scores', 'test')
print(results_df)

dataset                     climate                           gas            \
window_size                       1         2         3         1         2   
mixed-mixed-cal/finetune   0.424481  0.415877   0.42636       NaN       NaN   
mixed-mixed-west/finetune       NaN       NaN       NaN  0.513541  0.514554   
mixed-mixed/finetune            NaN       NaN       NaN       NaN       NaN   
text-text-cal/finetune     0.423342  0.426075  0.420445       NaN       NaN   
text-text-west/finetune         NaN       NaN       NaN  0.511938  0.514143   
text-text/finetune              NaN       NaN       NaN       NaN       NaN   

dataset                               medical                      
window_size                       3         1         2         3  
mixed-mixed-cal/finetune        NaN       NaN       NaN       NaN  
mixed-mixed-west/finetune  0.516322       NaN       NaN       NaN  
mixed-mixed/finetune            NaN       NaN  0.435199  0.424547  
text-text-cal/finetune     

In [8]:
def postprocess_df(df, rows_to_remove, columns_to_remove, precision):
    # Define a lambda to round and format the float to the desired precision
    format_float = lambda x: f"{x:.{precision}f}" if isinstance(x, float) else x
    
    # Remove specified rows and columns
    df = df.drop(index=rows_to_remove)
    df = df.drop(columns=columns_to_remove, axis=1)
    
    # Convert all values to the specified precision
    df = df.applymap(lambda x: format_float(round(float(x), precision)) if pd.notnull(x) and isinstance(x, (int, float)) else x)
    
    # Define a function to apply bold styling to the minimum value in each column
    def highlight_min(s):
        s = pd.to_numeric(s, errors='coerce')
        is_min = s == s.min()
        return ['font-weight: bold' if v else '' for v in is_min]
    
    # Apply the styling with the Styler object
    styled_df = df.style.apply(highlight_min, axis=0)
    
    return styled_df, df


# Example usage:
rows_to_remove = ['llama', 'm2zeroshot'] # replace with your actual row values to remove
# columns_to_remove = [('Yelp', 24), ('Mimic', 14), ('Climate', 30), ('Climate', 14)] # replace with your actual column values to remove
precision = 3

processed_df, df = postprocess_df(results_df, rows_to_remove, columns_to_remove, precision)
processed_df

KeyError: "['llama', 'm2zeroshot'] not found in axis"

In [11]:
{"day_3": {"Time": "2023-05-30", "summary": "**Forecast Summary**\n\nA significant weather pattern shift is expected from June 1-5, with multiple hazards affecting various regions.\n\n* **Heat Wave**: Well above normal to near record temperatures will occur from the Upper Midwest to the Northeast on Thursday-Friday, with highs potentially reaching 80s to low 90s.\n* **Heavy Rainfall**: A multi-day thunderstorm and local heavy rain threat continues for the Plains/northern Rockies, with a Slight Risk area over Montana and northern Wyoming.\n* **Flooding**: Flooding is possible across the Central Rockies, Northern Plains, Northern Rockies, and Northern Great Basin, with flooding occurring or imminent in the Northern Rockies and Northern Great Basin.\n* **Embedded Systems**: An upper trough/low may form across the Southeast late week, producing heavy rainfall and uncertainty in details.\n* **Gulf Coast/Florida System**: Heavy rainfall is expected in Florida, with a Marginal Risk area maintained in the Excessive Rainfall Outlook.\n* **Temperature Anomalies**: Near normal temperatures will occur in the Northwest on Thursday, while temperatures 5-15F above normal are expected from Friday onward. The southern tier will see near to moderately below normal temperatures, with the best focus for highs 5-10F below normal shifting across the Four Corners states into the southern High Plains and possibly the southern California coast."}}, "day_4": {"Time": "2023-06-01", "summary": "**Forecast Summary**\n\nA significant weather pattern shift is expected from June 1-5, with multiple hazards affecting various regions.\n\n* **Heat Wave**: Well above normal to near record temperatures will occur from the Upper Midwest to the Northeast on Thursday-Friday, with highs potentially reaching 80s to low 90s.\n* **Heavy Rainfall**: A multi-day thunderstorm and local heavy rain threat continues for the Plains/northern Rockies, with a Slight Risk area over Montana and northern Wyoming.\n* **Flooding**: Flooding is possible across the Central Rockies, Northern Plains, Northern Rockies, and Northern Great Basin, with flooding occurring or imminent in the Northern Rockies and Northern Great Basin.\n* **Embedded Systems**: An upper trough/low may form across the Southeast late week, producing heavy rainfall and uncertainty in details.\n* **Gulf Coast/Florida System**: Heavy rainfall is expected in Florida, with a Marginal Risk area maintained in the Excessive Rainfall Outlook.\n* **Temperature Anomalies**: Near normal temperatures will occur in the Northwest on Thursday, while temperatures 5-15F above normal are expected from Friday onward. The southern tier will see near to moderately below normal temperatures, with the best focus for highs 5-10F below normal shifting across the Four Corners states into the southern High Plains and possibly the southern California coast."}}