In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

IS_STD = True
if IS_STD: IS_STD = 'standardized'
else: IS_STD = 'unstandardized'

## add your path as a variable below aman_path
aman_path = '/content/gdrive/My Drive/School/Undergrad/Fall 2021/CS 490/Group Project/Code'

## cd into your path instead of aman_path. Don't delete, just comment out.
%cd -q $aman_path

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


# Load Benchmark Data

In [None]:
import pandas as pd
import pickle 
import numpy as np
import sys
sys.path.append('benchmarks/')
from process_benchmarks import load_benchmarks, process, normalize_by_group

In [None]:
benchmarks = load_benchmarks(f'./benchmarks/{IS_STD}_benchmarks.dat')
benchmarks

Unnamed: 0,bt,avl,rbt,lr,ann
fb,"{'predict_time': [1.961051483999654, 1.8609670...","{'predict_time': [0.35338029400008963, 0.35393...","{'predict_time': [0.3607729120003569, 0.354091...","{'predict_time': [0.0011556780000319122, 0.000...","{'ann_(32, 32)_true_relu': {'predict_time': [3..."
amzn,"{'predict_time': [0.16527157800010173, 0.16540...","{'predict_time': [0.030556787999557855, 0.0290...","{'predict_time': [0.028930536000189022, 0.0283...","{'predict_time': [0.001123631000155001, 0.0004...","{'ann_(32, 32)_true_relu': {'predict_time': [0..."
random,"{'predict_time': [5.088417005999872, 5.1493173...","{'predict_time': [0.9812894010001401, 0.963289...","{'predict_time': [0.9766021870000259, 0.961893...","{'predict_time': [0.00265383500027383, 0.00214...","{'ann_(32, 32)_true_relu': {'predict_time': [1..."
binomial,"{'predict_time': [1.5555267409999942, 1.491719...","{'predict_time': [0.4113487639997402, 0.409936...","{'predict_time': [0.41118549999919196, 0.39072...","{'predict_time': [0.0027075640000475687, 0.002...","{'ann_(32, 32)_true_relu': {'predict_time': [9..."
poisson,"{'predict_time': [1.063798965999922, 1.0313939...","{'predict_time': [0.32333207600004243, 0.33776...","{'predict_time': [0.28864850900026795, 0.28721...","{'predict_time': [0.004380793000564154, 0.0021...","{'ann_(32, 32)_true_relu': {'predict_time': [8..."
exponential,"{'predict_time': [5.093772069000806, 5.0189089...","{'predict_time': [0.9537958790006087, 0.964295...","{'predict_time': [0.9853575019997152, 0.973735...","{'predict_time': [0.002753326999481942, 0.0025...","{'ann_(32, 32)_true_relu': {'predict_time': [1..."
lognormal,"{'predict_time': [5.062787452000521, 5.0679790...","{'predict_time': [0.9611562749996665, 0.950969...","{'predict_time': [0.992081032999522, 0.9912143...","{'predict_time': [0.0023373209996861988, 0.002...","{'ann_(32, 32)_true_relu': {'predict_time': [8..."


In [None]:
def process(benchmarks):
  benchmarks = benchmarks.reset_index().rename(columns={'index': 'Dataset'})

  # process ann benchmarks
  anns = list(benchmarks['ann'][0].keys())
  benchmarks[anns] = float('nan') 
  for ind, row in benchmarks.iterrows():
    for ann in row['ann'].keys():
      benchmarks.loc[ind, ann] = str(row['ann'][ann])
  benchmarks = benchmarks.drop('ann', axis=1)

  benchmarks = pd.melt(benchmarks, id_vars='Dataset', var_name='Model', value_name='Metrics')
  benchmarks['Metrics'] = benchmarks['Metrics'].map(lambda x: eval(x) if type(x) == str else x)
  benchmarks[['Predict Time', 'MSE', 'MAE', 'Space']] = benchmarks['Metrics'].apply(pd.Series)
  benchmarks = benchmarks.drop(columns='Metrics')
    
  temp = benchmarks.explode('Predict Time').drop(columns=['MSE', 'MAE', 'Space'])
  temp['MSE'] = benchmarks.explode('MSE')['MSE']
  temp['MAE'] = benchmarks.explode('MAE')['MAE']
  temp['Space'] = benchmarks.explode('Space')['Space']
  temp['Fold'] = [i for i in range(1, 6)] * temp.index.nunique()
  temp = temp.reset_index(drop=True)
  
  return temp

In [None]:
benchmarks_processed = process(benchmarks)
benchmarks_processed

Unnamed: 0,Dataset,Model,Predict Time,MSE,MAE,Space,Fold
0,fb,bt,1.96105,0,0,62171120,1
1,fb,bt,1.86097,0,0,62171120,2
2,fb,bt,1.86195,0,0,62171120,3
3,fb,bt,1.87159,0,0,62171120,4
4,fb,bt,1.85289,0,0,62171120,5
...,...,...,...,...,...,...,...
170,lognormal,"ann_(32, 32)_true_relu",8.68328,8.89499e+06,1145.83,2897192,1
171,lognormal,"ann_(32, 32)_true_relu",8.38652,8.89499e+06,1145.83,2897192,2
172,lognormal,"ann_(32, 32)_true_relu",8.30188,8.89499e+06,1145.83,2897192,3
173,lognormal,"ann_(32, 32)_true_relu",10.274,8.89499e+06,1145.83,2897192,4


# Visualize

In [None]:
import plotly.express as px
import plotly.graph_objects as go

In [None]:
def get_prediction_benchmark_chart_for_metric(metric, std=True):
  fig = go.Figure()
  if metric == 'Predict Time': 
    if std:
      y_lab = 'Standardized Time'
    else:
      y_lab = 'Time (s)'
  else:
    y_lab = 'Metric'
  
  if std: 
    title = f"Standardized {metric}s Across Different Models and Datasets"
  else:
    title = f"{metric}s Across Different Models and Datasets"
    
  for dataset in benchmarks_processed['Dataset'].unique():
    data = benchmarks_processed[benchmarks_processed['Dataset'] == dataset]
    if std: data = normalize_by_group(data, 'Model')
    fig.add_trace(go.Bar(x=data['Model'], y=data[metric], name=dataset))
  fig.update_layout(
      title=title,
      yaxis_title=y_lab,
      xaxis_title='Model')
  return fig

In [None]:
get_prediction_benchmark_chart_for_metric('Predict Time')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
get_prediction_benchmark_chart_for_metric('Predict Time', std=False)

In [None]:
get_prediction_benchmark_chart_for_metric('MAE')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
get_prediction_benchmark_chart_for_metric('MAE', std=False)

In [None]:
get_prediction_benchmark_chart_for_metric('MSE')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
get_prediction_benchmark_chart_for_metric('MSE', std=False)

In [None]:
get_prediction_benchmark_chart_for_metric('Space')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
get_prediction_benchmark_chart_for_metric('Space', std=False)

### RMI

In [None]:
get_prediction_benchmark_chart_for_metric('Predict Time', std=False)

In [None]:
get_prediction_benchmark_chart_for_metric('MAE', std=False)

In [None]:
get_prediction_benchmark_chart_for_metric('MSE', std=False)

In [None]:
get_prediction_benchmark_chart_for_metric('Space', std=False)