In [2]:
import wandb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [9]:
# Initialize the API
api = wandb.Api()

# Specify your project details
entity = "haeri-hsn"  # Replace with your wandb entity
project = "stream_learning"  # Replace with your wandb project name

# Define filters
filters = {
    'state': 'finished',  # Only fetch finished runs
    'tags': {'$in': ['anchor_1000_normal']}  # Runs containing a specific tag msmsa_horizon_analysis_melbourne_housing \ msmsa_anchor_analysis_melbourne_housing
}

# Query runs with filters
runs = api.runs(f"{entity}/{project}", filters=filters)

# print number of runs
print(f"Number of runs: {len(runs)}")

Number of runs: 10


In [10]:
run_data = []
for run in runs:
    # if ('msmsa_horizon_analysis_melbourne_housing' in run.tags) and len(run.config['hor_candids']) == 7:
    #     continue
    # print(run.summary)
    run_dict = {

        
        'id': run.id,
        'name': run.name,
        'method': run.config['method'],
        'dataset': run.config['dataset'],
        'base_learner': run.config['base_learner'],
        'hor_candids': run.config['hor_candids'],
        'num_anchors': run.config['num_anchors'],

        'MAE': run.config['MAE'],
        'RMSE': run.config['RMSE'],
        'R2': run.config['R2'],
        'base_learner_params': run.config['base_learner_params'],

        # print(run.summary)
        'runtime': run.summary['_runtime'],  # Add runtime in seconds
        'num_timesteps': run.summary['_step'],  # Add runtime in seconds
        'num_train_samples': run.summary['num_train_samples'],
        'run_abs_error': run.summary['run_abs_error'],
        'run_y': run.summary['run_y'],
        'run_y_pred': run.summary['run_y_pred'],
        'tags': run.tags,

        # 'hor_candids': run.config.hor_candids,
        # 'num_anchors': run.config.num_anchors,
        # 'updated_at': run.config.updated_at,
        # 'MAE': run.tags,
        # 'RMSE': run.notes,
        # 'R2': run.config.R2,
        # 'base_learner_params': run.config.base_learner_params,

        # 'num_train_samples': run.config.num_train_samples,
        # 'run_abs_error': run.configrun_abs_error,
        # 'run_y': run.config.run_y,
        # 'run_y_pred': run.config.run_y_pred,

    }
    run_data.append(run_dict)

# Convert to DataFrame
df_normal = pd.DataFrame(run_data)

In [12]:
# combine df_uniform df_normal and df_exact into one dataframe add include a column for the type of data
df_normal['type'] = 'normal'
df_uniform['type'] = 'uniform'
df_exact['type'] = 'exact'

df = pd.concat([df_normal, df_uniform, df_exact])

# pickle the dataframe
df.to_pickle('df_anchor_distributions.pkl')


## Horizon Analysis

In [12]:
%matplotlib qt

plt.close('all')
# set theme for seaborn
sns.set_theme(style='whitegrid')


# assuming hor_candids is a list of integers, make another column (num_hor_candids) with the length of the list
df['num_hor_candids'] = df['hor_candids'].apply(len)

# remove rows with 
df = df.sort_values(by='num_hor_candids')
# Now create another column where if num_hor_candids is 7, then the value is 'exponential(^2)', if  37, then 'exponential(^1.15)' and if 991 then 'full'
df['hor_candids_type'] = df['num_hor_candids'].apply(lambda x: 'exponent=2' if x == 7 else 'exponent=1.15' if x == 37 else 'linear (full)')

# create a sns barplot of MAE, RMSE, R2 for each hor_candids configuration
def plot_metrics(df, metric):

    plt.figure(figsize=(4.5, 4))
    sns.barplot(x='hor_candids_type', y=metric, data=df, hue='hor_candids_type', palette='viridis', width=0.5)
    plt.title(f'{metric} for different horizon candidates setting')
    # remove x label
    plt.xlabel('Type of Setting')
    # if metric is runtime add a [s] to the y label
    plt.ylabel(f'Runtime [sec]' if metric == 'runtime' else metric)
    plt.tight_layout()
    plt.show()


plot_metrics(df, 'MAE')
plot_metrics(df, 'RMSE')
plot_metrics(df, 'R2')
plot_metrics(df, 'runtime')





## Anchor Analysis

In [24]:
%matplotlib qt

plt.close('all')
# set theme for seaborn
sns.set_theme(style='whitegrid')

colors = ['#FF5733', '#33FF57', '#3357FF']

# create a sns barplot of MAE, RMSE, R2 for each hor_candids configuration
def plot_metrics(df, metric):

    plt.figure(figsize=(3, 4))
    sns.barplot(x='type', y=metric, data=df, hue='type', palette=colors, width=0.5, order=['uniform', 'normal', 'exact'])
    # plt.title(f'{metric} for different distribution of anchor points')
    # remove x label
    plt.xlabel('Distribution of Anchor Points')
    # if metric is runtime add a [s] to the y label
    plt.ylabel(f'Runtime [sec]' if metric == 'runtime' else metric)
    plt.tight_layout()
    plt.show()


plot_metrics(df, 'MAE')
plot_metrics(df, 'RMSE')
plot_metrics(df, 'R2')
plot_metrics(df, 'runtime')