In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

### Fix format
The default format of the data_budget_worker return is {'loss': -- , 'info': -- }. To cover for the modification I will re-format it

In [2]:

# def fix_format(res_object):
#     for config_id, datum in res_object.data.items():
#         try:
#             datum_copy=datum
#             keys = list(datum.results.keys())
#             bodies = list(datum.results.values())
#             formatted_results = {}
#             for key, body in zip(keys, bodies):
#                 loss = body['loss']
#                 info = {k:v for k,v in body.items() if k != 'loss'}
#                 formatted_results[key] = {'loss': loss, 'info': info}
#             datum_copy.results = formatted_results
#             res_object.data[config_id] = datum_copy
#         except:
#             pass
#     return res_object


### Define utils

In [3]:
# Read the picke

def prepare_dataframe(pickled_object):
    model_name = pickled_object.split('/')[-1].split('.')[0]
    model_name = model_name.split('JOB1')[0]
    model_name = model_name.split('Model')[0]
    res_object = fix_format(pd.read_pickle(pickled_object))
    all_runs = res_object.get_all_runs()
    res_df = pd.DataFrame()
    res_df['config_id'] = [run.config_id for run in all_runs]
    res_df['loss'] = [run.loss for run in all_runs]
    res_df['budget'] = [run.info['info']['budget'] for run in all_runs]
    res_df['total_hours'] = [run.info['info']['total_hours'] for run in all_runs]
    res_df['NEFAUC'] = np.array([run.info['info']['metric_info']['val']['NEFAUC'] for run in all_runs])*4
    res_df['model'] = model_name 
    return res_df

def plot_results(res_df):
    fig = go.Figure()
    # Group the data by `config_id`
    model_name = res_df['model'].unique()[0]
    grouped = res_df.groupby('config_id')
    # Loop over each group and add a scatter trace to the figure
    group_idx = 0
    for _ , group_data in grouped:
        fig.add_trace(go.Scatter(
            x=group_data['budget'].astype(str),
            y=group_data['NEFAUC'],
            mode='lines+markers',
            name=f'{model_name}{group_idx}',
            line=dict(width=2),
            marker=dict(size=8),
        ))
        group_idx += 1

    # Set the x-axis and y-axis labels
    fig.update_layout(
        xaxis_title='Budget',
        yaxis_title='NEFAUC',
        width=800,
        height=650,
        title=f'{model_name} NEFAUC vs Budget',
        title_x=0.5,
    )
    

    # Show the plot
    return fig

In [4]:
res_object = pd.read_pickle('../run_results/RandomForestJOB1.pickle')

## Random Forest

In [5]:
rf_df = prepare_dataframe('../run_results/RandomForestJOB1.pickle')
rf_fig = plot_results(rf_df)
rf_fig

## ChemCeption

In [6]:
chem_ception_df = prepare_dataframe('../run_results/ChemCeptionJOB1.pickle')
chem_ception_fig = plot_results(chem_ception_df)
chem_ception_fig.write_image('../figures/ChemCeption.png', width=800, height=650, scale=2)


## GraphConv

In [7]:
graph_conv_df = prepare_dataframe('../run_results/GraphConvModelJOB1.pickle')
graph_conv_fig = plot_results(graph_conv_df)
graph_conv_fig.write_image('../figures/GraphConv.png', width=800, height=650, scale=2)

## Combine all and plot them

In [8]:
import plotly.graph_objs as go

fig = go.Figure()
for trace in rf_fig.data + chem_ception_fig.data + graph_conv_fig.data:
    fig.add_trace(trace)

# Show the combined figure
# increase length of graph
fig.update_layout(
    width=1000,
    height=1200,
)

fig.update_layout(
    xaxis_title='Budget',
    yaxis_title='NEFAUC',
)

fig.show()


In [6]:
import pandas as pd
res = pd.read_pickle('res.pkl')
len(res.get_all_runs())
res.get_all_runs()[-1]

config_id: (3, 0, 3)	budget: 90.000000	loss: None
time_stamps: 0.47661900520324707 (submitted), 0.47664308547973633 (started), 0.47948193550109863 (finished)
info: None