In [4]:
import wandb
import pandas as pd
import numpy as np

api = wandb.Api(overrides={'base-url': "https://rosewandb.ucsd.edu"})
runs = api.runs("cht028/inference-nlinear")

In [5]:
def create_summary_table(runs, row_name, multi_column_name, column_name, metric):
    # Initialize a dictionary for storing the lowest metric values
    lowest_metrics = {}
    run_mapping = {}

    # Find the run with the lowest metric for each combination
    for run in runs:
        try:
            name = run.config[row_name]
            dataset = run.config[multi_column_name]
            window_size = run.config[column_name] if column_name else None
            metric_value = float(run.summaryMetrics[metric])
            metric_value = np.round(metric_value, 3)
        except KeyError:
            continue

        key = (name, dataset) if column_name is None else (name, dataset, window_size)

        if key not in lowest_metrics:
            lowest_metrics[key] = metric_value
            run_mapping[key] = run

    # Prepare the data structure for DataFrame creation
    index = sorted(set(key[0] for key in run_mapping.keys()))
    if column_name:
        datasets = sorted(set(key[1] for key in run_mapping.keys()))
        window_sizes = sorted(set(key[2] for key in run_mapping.keys()))
        multi_columns = pd.MultiIndex.from_product([datasets, window_sizes], names=[multi_column_name, column_name])
    else:
        multi_columns = sorted(set(key[1] for key in run_mapping.keys()))

    # Create the DataFrame
    results_df = pd.DataFrame(index=index, columns=multi_columns)

    # Populate the DataFrame
    for key, run in run_mapping.items():
        if column_name:
            results_df.at[key[0], (key[1], key[2])] = run.summaryMetrics[metric]
        else:
            results_df.at[key[0], key[1]] = run.summaryMetrics[metric]

    # Drop rows and columns with all NaN values
    results_df.dropna(axis=0, how='all', inplace=True)
    results_df.dropna(axis=1, how='all', inplace=True)

    return results_df

In [6]:
results_df = create_summary_table(runs, 'model', 'dataset', 'window_size', 'RMSE Scores')
# results_df.to_excel('results.xlsx')
results_df

dataset,climate,climate,climate,climate,climate,climate,climate,medical,medical,medical,medical,medical,medical,medical
window_size,1-1,2-2,3-3,4-4,5-5,6-6,7-7,1-1,2-2,3-3,4-4,5-5,6-6,7-7
nlinear,4.980987,6.12917,6.501329,6.710265,6.833787,6.915503,6.961974,5.194627,5.279133,5.274899,5.40642,5.561543,5.875239,6.598188
nlinear_textEmbedding,4.835434,5.800209,5.950541,5.934357,6.022488,6.023838,6.105879,5.117223,5.142506,5.105691,5.300131,5.492464,5.759158,5.98253


In [25]:
from pydantic import BaseModel, create_model

def create_answer_format(window_size: int):
    # Dynamically generate fields for the class based on the window size
    fields = {}
    for i in range(3, 3 + window_size):
        fields[f'day_{i}_date'] = (str, ...)
        fields[f'day_{i}_weather_forecast'] = (str, ...)
    
    # Create a dynamic model with the generated fields
    return create_model('AnswerFormat', **fields)

# Example Usage
window_size = 3  # Set window size to 3 dynamically
AnswerFormat = create_answer_format(window_size)
print(AnswerFormat.model_json_schema())

# Create an instance with sample data
data = {
    'day_3_date': '2024-09-01',
    'day_3_weather_forecast': 'Sunny',
    'day_4_date': '2024-09-02',
    'day_4_weather_forecast': 'Rainy',
    'day_5_date': '2024-09-03',
    'day_5_weather_forecast': 'Cloudy'
}

# Validate the data using the dynamically generated model
answer = AnswerFormat(**data)
print(answer)


{'properties': {'day_3_date': {'title': 'Day 3 Date', 'type': 'string'}, 'day_3_weather_forecast': {'title': 'Day 3 Weather Forecast', 'type': 'string'}, 'day_4_date': {'title': 'Day 4 Date', 'type': 'string'}, 'day_4_weather_forecast': {'title': 'Day 4 Weather Forecast', 'type': 'string'}, 'day_5_date': {'title': 'Day 5 Date', 'type': 'string'}, 'day_5_weather_forecast': {'title': 'Day 5 Weather Forecast', 'type': 'string'}}, 'required': ['day_3_date', 'day_3_weather_forecast', 'day_4_date', 'day_4_weather_forecast', 'day_5_date', 'day_5_weather_forecast'], 'title': 'AnswerFormat', 'type': 'object'}
day_3_date='2024-09-01' day_3_weather_forecast='Sunny' day_4_date='2024-09-02' day_4_weather_forecast='Rainy' day_5_date='2024-09-03' day_5_weather_forecast='Cloudy'


In [2]:
import json

# Input dictionary (example with unsorted keys)
data = {
    "day_4_temp": 25.4,
    "day_3_temp": 22.4,
    "day_2_temp": 30,
    "day_5_temp": 28.1
}

# Sort the dictionary by keys
sorted_data = dict(sorted(data.items()))

# Convert the result into a JSON formatted string for better readability
sorted_data_json = json.dumps(sorted_data, indent=4)

# Output the sorted dictionary
print(sorted_data_json)


{
    "day_2_temp": 30,
    "day_3_temp": 22.4,
    "day_4_temp": 25.4,
    "day_5_temp": 28.1
}
