# Guidepost Tutorial Notebook

## Provided Datasets
test_data_med.parquet


In [9]:
# Utility Functions
import pandas as pd

def filter_data_by_category(df, column, cats):
    return df[df[column].isin(cats)]
    

In [10]:
from guidepost import Guidepost
    

In [11]:
gp = Guidepost()

In [12]:
results_df_merged = pd.read_parquet("data/test_data_med.parquet")

gp.vis_data = results_df_merged.dropna()[['account',\
                                 # 'accounting_qos',\
                                 'day_of_week',\
                                 # 'day_of_week_num',\
                                 # 'hour_of_day',\
                                 'hours_used',\
                                 'cpu_eff',\
                                 'job_array_id',\
                                 'nodes_req',\
                                 'partition',\
                                 'processors_req',\
                                 'processors_used',\
                                 'qcd',\
                                 'qos',\
                                 'queue_avg_mem',\
                                 'queue_depth_log',\
                                 'queue_avg_size',\
                                 'queue_wait',\
                                 'queue_wait_pred',\
                                 'start_time',\
                                 'submit_time',\
                                 # 'state',\
                                 'user',\
                                 'wallclock_req']].to_dict()

# gp.vis_data = filter_data_by_category(results_df_merged, 'user', ['pwhite','bbenton']).dropna().to_dict()

# Reccommended Categories and Mappings

## Y Axis
`queue_wait` - The wait time a job took in the queue (Seconds)

`queue_wait_pred` - The predicted wait time for a job (Seconds)

`wallclock_req` - The amout of time requested by the user at job submission (Seconds)
 
`hours_used` - The amount of time used by a job (Hours)

## Color
`nodes_req` - Number of nodes requested by a job

`processors_req` - Number of jobs requested by a job

`processors_used` - Number of nodes actually used by a job

`queue_wait` - The wait time a job took in the queue (Seconds)

`queue_wait_pred` - The predicted wait time for a job (Seconds)

`queue_avg_size` - The average size of the queue at submission time 

`hours_used` - The amount of time used by a job (Hours)

## Category
`user` - Users who submitted jobs to the system

`day_of_week` - Day of the week when the job began running

In [13]:
gp.vis_configs = {
        'x': 'queue_wait',
        'y': 'qcd',
        'color': 'nodes_req',
        'color_agg': 'avg',
        'categorical': 'user'
}

In [14]:
gp

Guidepost(vis_configs={'x': 'queue_wait', 'y': 'qcd', 'color': 'nodes_req', 'color_agg': 'avg', 'categorical':…

In [16]:
gp.retrieve_selected_data()

Unnamed: 0,account,day_of_week,hours_used,cpu_eff,job_array_id,nodes_req,partition,processors_req,processors_used,qcd,...,queue_avg_mem,queue_depth_log,queue_avg_size,queue_wait,queue_wait_pred,start_time,submit_time,user,wallclock_req,index
0,decarbsagen,Friday,18.560278,0.074000,12649893_0.0,1,long,1,36,0.731992,...,8.878234e+09,3.468856,97135.488464,78929,4784.609863,2023-06-17T13:45:21-07:00,2023-06-16T15:49:52-07:00,ahamilto,864000,16224
1,silimorphous,Tuesday,2.327222,0.720000,12772620_0.0,1,long,32,36,0.750625,...,1.620159e+09,7.328503,18091.953960,77851,91230.148438,2023-07-04T23:55:47-07:00,2023-07-04T02:18:16-07:00,rohith,180000,18458
2,silimorphous,Tuesday,5.574444,0.729000,12772627_0.0,1,long,32,36,0.827045,...,1.613409e+09,7.333088,18016.949293,78367,91230.148438,2023-07-05T00:05:08-07:00,2023-07-04T02:19:01-07:00,rohith,180000,18465
3,silimorphous,Tuesday,5.520278,0.715000,12772630_0.0,1,long,32,36,0.730878,...,1.610535e+09,7.335047,17985.014121,78350,91230.148438,2023-07-05T00:05:08-07:00,2023-07-04T02:19:18-07:00,rohith,180000,18468
4,silimorphous,Tuesday,0.443889,0.814000,12772646_0.0,1,long,32,36,0.679197,...,1.595395e+09,7.345429,17816.782359,78854,89269.148438,2023-07-05T00:15:13-07:00,2023-07-04T02:20:59-07:00,rohith,180000,18484
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
665,sbwater,Monday,105.135280,0.771245,13141991_0.0,1,long,1,36,0.630574,...,2.451179e+10,6.665811,287535.036032,950111,635.842102,2023-09-08T10:55:00-07:00,2023-08-28T10:59:49-07:00,mzaniolo,432000,56831
666,sbwater,Monday,108.846950,0.748960,13141984_0.0,1,long,1,36,0.728988,...,2.451179e+10,6.665811,287535.036032,948651,635.842102,2023-09-08T10:30:40-07:00,2023-08-28T10:59:49-07:00,mzaniolo,432000,56840
667,sbwater,Monday,95.280830,0.732699,13141968_0.0,1,long,1,36,0.653626,...,2.451179e+10,6.665811,287535.036032,947924,635.842102,2023-09-08T10:18:33-07:00,2023-08-28T10:59:49-07:00,mzaniolo,432000,56842
668,finito,Monday,0.015000,0.012724,13152508_0.0,1,long,1,36,0.503264,...,3.083338e+10,6.461624,353415.840057,923349,23816.697266,2023-09-08T08:52:30-07:00,2023-08-28T16:23:21-07:00,ahamilto,432000,56872
