In [None]:
import pandas as pd
import json
import pickle
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import display, HTML
import os,sys,inspect, pickle
import numpy as np
import matplotlib.colors as mplcolors
from matplotlib.ticker import MaxNLocator

from utils import *

pd.set_option('display.max_colwidth', 500)

# all except 2,17,20 because they dominate costs by orders of magnitude
QUERIES = [1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,19,21,22]
X_AXIS_TITLE = 'Index Storage Consumption (GB)'
LABEL_FONT_SIZE = 14
TITLE_FONT_SIZE = 15
MARKER_SIZE = 6
LINE_WIDTH = 1
GRAPH_SIZE = (5,3.45)

matplotlib.rcParams['xtick.labelsize'] = LABEL_FONT_SIZE
matplotlib.rcParams['ytick.labelsize'] = LABEL_FONT_SIZE

In [None]:
def get_costs(df):
    costs = []
    
    for _, row in df.iterrows():
        row_cost = 0
        for column in df.columns:
            if column[0] == 'q':
                row_cost += float(json.loads(row[column])['Cost'])
        costs.append(row_cost)
    
    return costs

In [None]:
def draw_cost_graph(cophy_costs=None, cophy_memory_consumption=None, legend=True):
    no_index_df = pd.read_csv(f"{CSV_PATH}/results_no_index_{BENCHMARK}_{len(QUERIES)}_queries.csv", sep=';')
    no_index_cost = get_costs(no_index_df)[0]
    fig, ax = plt.subplots()
    for algorithm in ALGORITHMS:
        if algorithm == 'auto_admin_naive_2':
            continue
        
        style = styles[algorithm]

        if algorithm == 'cophy':
            if cophy_costs is not None and cophy_memory_consumption is not None:
                relative_costs = list(map(lambda x: x / no_index_cost * 100, cophy_costs))
                memory_consumptions_gb = list(map(lambda x: mb_to_gb(x), cophy_memory_consumption))
                memory_consumptions_gb = list(filter(lambda x: x < XLIM, memory_consumptions_gb))
                relative_costs = relative_costs[:len(memory_consumptions_gb)]
                ax.step(memory_consumptions_gb, relative_costs, where='post', color=style.color, linewidth=LINE_WIDTH)
                ax.plot(memory_consumptions_gb, relative_costs, f'{style.marker}', color=style.color, label=style.label, markersize=MARKER_SIZE)
            continue

        csv_path= f"{CSV_PATH}/results_{algorithm}_{BENCHMARK}_{len(QUERIES)}_queries.csv"
        try:
            df = pd.read_csv(csv_path, sep=';')
        except:
            continue
        if algorithm == 'dexter':
            df = df.iloc[::-1]
        df['memory consumption'] = df['memory consumption'].apply(b_to_gb)
        df = df.query(f'`memory consumption` < {XLIM}')
        # Don't draw measurements that did not identify any indexes
        df = df.query('`indexed columns` != "[]"')

        costs = get_costs(df)
        relative_costs = list(map(lambda x: x / no_index_cost * 100, costs))

        ax.step(df['memory consumption'], relative_costs, where='post', color=style.color, linewidth=LINE_WIDTH)
        ax.plot(df['memory consumption'], relative_costs, f'{style.marker}', color=style.color, label=style.label, markersize=MARKER_SIZE)

    if legend:
        fig.legend(fontsize=LABEL_FONT_SIZE - 1)
    plt.xlabel(X_AXIS_TITLE, fontsize=LABEL_FONT_SIZE)
    plt.ylabel('Relative workload cost\n(% to no index)', fontsize=LABEL_FONT_SIZE)
    plt.ylabel('Relative workload cost (%)', fontsize=LABEL_FONT_SIZE)
    sf_string = f' (SF {SCALE_FACTOR})' if SCALE_FACTOR is not None else ''
#     plt.title(f'{BENCHMARK.upper()}{sf_string}: Performance vs Memory Budget', fontsize=TITLE_FONT_SIZE)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    fig.set_size_inches(GRAPH_SIZE, forward=True)
    fig.tight_layout()
    ax.set_xlim([-0.1, XLIM])
    fig.savefig(f"{CSV_PATH}/{BENCHMARK.lower()}_cost_estimation.pdf", bbox_inches='tight', pad_inches=0)

def draw_runtime_graph(minutes=False, cophy_runtimes=None, cophy_memory_consumption=None, legend=True):
    fig, ax = plt.subplots()
    for algorithm in ALGORITHMS:
        if algorithm == 'auto_admin_naive_2' or (algorithm == 'cophy' and cophy_runtimes is None):
            continue
        style = styles[algorithm]
        
        if algorithm == 'cophy':
            cophy_runtimes = list(map(lambda x: x / 60, cophy_runtimes)) if minutes else cophy_runtimes
            memory_consumptions_gb = list(map(lambda x: mb_to_gb(x), cophy_memory_consumption))
            memory_consumptions_gb = list(filter(lambda x: x < XLIM, memory_consumptions_gb))
            cophy_runtimes = cophy_runtimes[:len(memory_consumptions_gb)]
            ax.step(memory_consumptions_gb, cophy_runtimes, where='post', color=style.color, linewidth=LINE_WIDTH)
            ax.plot(memory_consumptions_gb, cophy_runtimes, f'{style.marker}', color=style.color, label=style.label, markersize=MARKER_SIZE)
            continue
            
        
        csv_path= f"{CSV_PATH}/results_{algorithm}_{BENCHMARK}_{len(QUERIES)}_queries.csv"
        df = pd.read_csv(csv_path, sep=';')
        df['memory consumption'] = df['memory consumption'].apply(b_to_gb)
        df = df.query(f'`memory consumption` < {XLIM}')
        # Don't draw measurements that did not identify any indexes
        df = df.query('`indexed columns` != "[]"')
        
        runtime = df['algorithm runtime'].apply(s_to_m) if minutes else df['algorithm runtime']
        ax.step(df['memory consumption'], runtime, where='post', color=style.color, linewidth=LINE_WIDTH)
        ax.plot(df['memory consumption'], runtime, f'{style.marker}', color=style.color, label=style.label, markersize=MARKER_SIZE)

    if legend:
        fig.legend(fontsize=LABEL_FONT_SIZE - 1)
    plt.xlabel(X_AXIS_TITLE, fontsize=LABEL_FONT_SIZE)
    ylabel_unit = 'min' if minutes else 'sec'
    plt.ylabel(f'Algorithm runtime ({ylabel_unit})', fontsize=LABEL_FONT_SIZE)
    sf_string = f' (SF {SCALE_FACTOR})' if SCALE_FACTOR is not None else ''
#     plt.title(f'{BENCHMARK.upper()}{sf_string}: Algorithm Runtime vs Memory Budget', fontsize=TITLE_FONT_SIZE)

    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    fig.set_size_inches(GRAPH_SIZE, forward=True)
    fig.tight_layout()
    ax.set_xlim([-0.1, XLIM])
    fig.savefig(f"{CSV_PATH}/{BENCHMARK.lower()}_runtime.pdf", bbox_inches='tight', pad_inches=0)
    
def draw_what_if_graph(million=False):
    def determine_max_created_indexes_in_epic(df_epic):
        indexed_column_steps = df['indexed columns'].values
        numbers_of_indexes = []
        for indexed_column_step in indexed_column_steps:
            number_of_indexes = indexed_column_step.count('I(C')
            numbers_of_indexes.append(number_of_indexes)

        return numbers_of_indexes

    print('Average cache rates:')
    width = 0.18
    pos = None
    labels = None
    fig, ax = plt.subplots()
    idx = 0
    for algorithm in ALGORITHMS:
        style = styles[algorithm]
        if algorithm == 'cophy':
            continue
        if algorithm not in['auto_admin', 'drop', 'auto_admin_naive_2']:
            continue
        csv_path= f"{CSV_PATH}/results_{algorithm}_{BENCHMARK}_{len(QUERIES)}_queries.csv"
        df = pd.read_csv(csv_path, sep=';')
        df = df.head(XLIM)
        if idx == 0:
            labels = df.index + 1
            pos = np.arange(len(labels))
        requests = df['cost requests'] / 1000000 if million else df['cost requests']
        hits = df['cache hits'] / 1000000 if million else df['cache hits']
        ax.bar([p + width * idx for p in pos], requests - hits, width, bottom=hits, label=style.label, color=style.color, hatch=style.hatch)
        ax.bar([p + width * idx for p in pos], hits, width, color=style.color, alpha=0.5, hatch=style.hatch) # label=f"{style.label} (cached)",

        avg_cache_rate = (hits / requests).mean()
        print(f'  {style.label}: {avg_cache_rate}')
        
        idx += 1

    for algorithm in ['extend', 'relaxation']:
        style = styles[algorithm]
        csv_path= f"{CSV_PATH}/results_{algorithm}_{BENCHMARK}_{len(QUERIES)}_queries.csv"
        df = pd.read_csv(csv_path, sep=';')
        df['number of indexes'] = determine_max_created_indexes_in_epic(df)
        df = df.sort_values(by=['number of indexes'])
        df = df.groupby('number of indexes').first()
        cache_hits = list(df['cache hits'].values)
        cost_requests = list(df['cost requests'].values)
        for i in range(len(labels) - len(cache_hits)):
            cache_hits.append(0)
            cost_requests.append(0)
        if len(cache_hits) > len(labels):
            cache_hits = cache_hits[:len(labels)]
            cost_requests = cost_requests[:len(labels)]
        requests = list(map(lambda x: x / 1000000, cost_requests)) if million else cost_requests
        hits = list(map(lambda x: x / 1000000, cache_hits)) if million else cache_hits
        requests_hits_difference = list(map(lambda x, y: x - y, requests, hits))
        ax.bar([p + width * idx for p in pos], requests_hits_difference, width, bottom=hits, label=style.label, color=style.color, hatch=style.hatch)
        ax.bar([p + width * idx for p in pos], hits, width, color=style.color, alpha=0.5, hatch=style.hatch) #label=f"{style.label} (cached)"
        idx += 1
    
        rates = []
        for hit, request in zip(hits, requests):
            if request <= 0:
                continue
            rates.append(hit / request)
        avg_cache_rate = np.mean(rates)
        print(f'  {style.label}: {avg_cache_rate}')

    ax.set_xticks([p + ((idx - 1) / 2) * width for p in pos])
    ax.set_xticklabels(labels)
    ax.legend(fontsize=LABEL_FONT_SIZE - 1)
    ylabel_string = '(millions)' if million else ''
    plt.ylabel(f'Cost requests {ylabel_string}', fontsize=LABEL_FONT_SIZE)
    plt.xlabel('Number of indexes', fontsize=LABEL_FONT_SIZE)
    fig.tight_layout()
    fig.set_size_inches(GRAPH_SIZE)
    ax.set_xlim([-0.2, len(labels) - 0.2])
    plt.show()

    fig.savefig(f"{CSV_PATH}/{BENCHMARK.lower()}_what_if_graph.pdf", bbox_inches='tight')
    

In [None]:
def draw_legend():
    lines = []
    fig, ax = plt.subplots()
    for algorithm in ALGORITHMS:
        if algorithm == 'auto_admin_naive_2':
            continue
        if algorithm == 'auto_admin':
            marker_size = MARKER_SIZE + 5
        else:
            marker_size = MARKER_SIZE + 3
        line = ax.plot([1], label=styles[algorithm].label, linewidth=0, color=styles[algorithm].color, marker=styles[algorithm].marker, markersize=marker_size)
        lines.append(line)
    plt.legend(loc="center", ncol=len(ALGORITHMS), fontsize=LABEL_FONT_SIZE, frameon=False)
    for line in ax.lines:
        line.set_visible(False)
    fig.patch.set_visible(False)
    ax.axis('off')
    plt.show()
    fig.set_size_inches((8.5, 0.4))
    fig.tight_layout()
    fig.savefig("../legend.pdf", bbox_inches='tight', pad_inches = 0)
draw_legend()

## TPCH - Cost

In [None]:
CSV_PATH = '../tpch_wo_2_17_20'
BENCHMARK = 'tpch'
SCALE_FACTOR = 10
# all except 2,17,20 because they dominate costs by orders of magnitude
QUERIES = [1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,19,21,22]
# QUERIES = range(1, 23)
XLIM = 10

In [None]:
# old
cophy_memory_consumptions_mb = [250, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4250, 5000, 6500]
cophy_costs = [34396397.79, 33854446.79, 33133046.48, 32783525.42, 27986156.77, 27258434.02, 26896370.87, 26442926.89, 24993712.59, 24976459.99, 24954672.03]
draw_cost_graph(cophy_costs, cophy_memory_consumptions_mb, legend=True)
# new cophy
# cophy_memory_consumptions_mb = [250,500,1000,1500,2000,2500,3000,3500,4250,5000,5750,6500,8000,10000,12500,15000]
# cophy_costs = [34746687.52, 34380645.08, 33818483.44, 33175526.84, 32898379.09, 28243364.04, 27814746.6, 27314826.62, 26407652.82, 25598119.51, 25039547.51, 24841420.84, 24442037.82, 23998252.03, 23497725.26, 23408087.08, 23332660.05]
# draw_cost_graph(cophy_costs, cophy_memory_consumptions_mb)


## TPCH - Runtime

In [None]:
# Cophy What-If time: 151.91098499298096 - cost_requests: 82676 - cache_hits: 45776 - Gurobi Times:
cophy_what_if_time = 151.91098499298096
cophy_solver_times = [0.067291, 0.067734, 0.066856, 0.08871, 0.102708, 0.104063, 0.122531, 0.117007, 0.21763, 0.451052, 0.481237, 0.504284, 0.502142, 0.466632, 1.860344, 1.843925]
cophy_times = list(map(lambda x: x + cophy_what_if_time, cophy_solver_times))
draw_runtime_graph(cophy_runtimes=cophy_times, cophy_memory_consumption=cophy_memory_consumptions_mb)

## TPCH - What If Graph

In [None]:
draw_what_if_graph()


## TPCH - Query Chart

In [None]:
CSV_PATH = '../tpch_wo_2_17_20/all_queries'
QUERIES = range(1, 23)

currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
parentdir = os.path.dirname(parentdir)
sys.path.insert(0,parentdir)

from selection.index import Index
from selection.workload import Workload
from selection.cost_evaluation import CostEvaluation
from selection.dbms.postgres_dbms import PostgresDatabaseConnector
from selection.what_if_index_creation import WhatIfIndexCreation

pdc = PostgresDatabaseConnector('indexselection_tpch___10')
what_if = WhatIfIndexCreation(pdc)

cache = {}

def workload_to_str(workload):
    return str(workload.queries)

def what_if_cost(workload, index_combination):
    cache_key = (workload_to_str(workload), frozenset(index_combination))
    if cache_key not in cache:
        database_connector = PostgresDatabaseConnector('indexselection_tpch___10')
        cost_evaluation = CostEvaluation(database_connector)
        cost = cost_evaluation.calculate_cost(workload, index_combination)
        cache[cache_key]= cost
        database_connector.close()
    else:
        cost = cache[cache_key]
    return cost

def exploit_indexes(recommended_indexes_per_query, indexes):
    total_storage_consumption = 0
    
    for index in indexes:
        what_if.simulate_index(index, store_size=True)
        total_storage_consumption += index.estimated_size

    for query in workload.queries:
        plan = pdc.get_plan(query)
        plan_string = str(plan)
        cost = plan['Total Cost']
#         recommended_indexes_per_query[query.nr] = (cost / no_index_costs[query.nr] * 100, [])
        recommended_indexes_per_query[query.nr] = (cost, [])
        for index in indexes:
            if index.hypopg_name in plan_string:
                recommended_indexes_per_query[query.nr][1].append(index)
    what_if.drop_all_simulated_indexes()
    
    return total_storage_consumption

def costs_from_dict(d):
    costs = []
    for key in sorted(d.keys()):
        costs.append(d[key][0])
    return costs

def unpickle_indexes(file):
    indexes = []
    with open(file, 'rb') as f:
        while True:
            try:
                indexes.append(pickle.load(f))
            except EOFError:
                break
    return indexes

def best_indexes_fitting_budget(algorithm):
    csv_path= f"{CSV_PATH}/results_{algorithm}_{BENCHMARK}_{len(QUERIES)}_queries.csv"
    df = pd.read_csv(csv_path, sep=';')
    if algorithm == 'dexter':
        df = df.iloc[::-1]
    # idx indicates Pandas indexes while index(es) indicates database index
    df['original idx'] = df.index
    df = df[df['memory consumption'] < MEMORY_CONSUMPTION_FILTER_B]
    if len(get_costs(df)) < 1:
        return []
    row_with_best_configuration = get_costs(df).index(min(get_costs(df)))
    df = df.iloc[row_with_best_configuration]
    best_config_idx = df['original idx']
    
    indexes_file = f"{CSV_PATH}/indexes_{algorithm}_{BENCHMARK}_{len(QUERIES)}_queries.pickle"
    indexes = unpickle_indexes(indexes_file)
    
    return indexes[best_config_idx]


def draw_tpch_query_graph(yscale='linear', small=False):    
    width = 0.12
    pos = None
    labels = None
    fig, ax = plt.subplots()
    idx = 0
    for algorithm in ALGORITHMS + ['no_index']:
        style = styles[algorithm]
        if style.index_history is None:
            continue
        recommended_indexes = {}
        total_storage_consumption = exploit_indexes(recommended_indexes, style.index_history)
        costs = costs_from_dict(recommended_indexes)
        if idx == 0:
            labels = list(recommended_indexes.keys())
            pos = np.arange(len(labels))
        if not small:
            costs = list(map(lambda x: x / 1000000, costs))
        # Calculate color so that the hatches are visible but not pushy
        hsv = mplcolors.rgb_to_hsv(style.color)
        hatch_color_hsv = hsv
        hatch_color_hsv[2] = hsv[2] + 0.2 if hsv[2] < 0.5 else hsv[2] - 0.2
        edgecolor = mplcolors.hsv_to_rgb(hatch_color_hsv)
        bar = ax.bar([p + width * idx for p in pos], costs, width, label=(f"{style.label} ({len(style.index_history)})"), color=style.color, hatch=style.hatch, edgecolor=edgecolor, linewidth=0)
        idx += 1

        print(f"{algorithm} storage comsumption: {b_to_gb(total_storage_consumption)}")

    ax.set_xticks([p + ((idx - 1) / 2) * width for p in pos])
    ax.set_xticklabels(labels)
    if not small:
        legend = ax.legend(title=r'Algorithm ($|S|$)', fontsize=LABEL_FONT_SIZE - 3, loc='upper left', ncol=2)
        plt.setp(legend.get_title(),fontsize=LABEL_FONT_SIZE - 2)
    # plt.ylabel('Query cost in % of w/o indexes', fontsize=LABEL_FONT_SIZE)
    plt.ylabel(f"Query cost{'' if small else ' (million)'}", fontsize=LABEL_FONT_SIZE)
    plt.xlabel('Query ID', fontsize=LABEL_FONT_SIZE)
    plt.yscale(yscale)
    # plt.title('Performance impact of final index combination per algorithm', fontsize=TITLE_FONT_SIZE)
    fig.tight_layout()
    size = (12, 2.3) if not small else (3.6, 2.23)
    fig.set_size_inches(size)
    plt.show()

    fig.savefig(f"{CSV_PATH}/{BENCHMARK.lower()}_query_graph_{'small' if small else 'large'}.pdf", bbox_inches='tight', pad_inches = 0)

####### CONFIG
    
MEMORY_CONSUMPTION_FILTER_B = 5000000000
    
for algorithm in ALGORITHMS:
    if algorithm == 'cophy' or algorithm == 'auto_admin_naive_2':
        continue
    styles[algorithm].index_history = best_indexes_fitting_budget(algorithm)
    
workload = pickle.load(open(f'{CSV_PATH}/workload_{BENCHMARK}_{len(QUERIES)}_queries.pickle', 'rb'))
styles['no_index'].index_history = []
# Filter uninteresting ones
workload.queries = [query for query in workload.queries if query.nr not in [1, 3, 6, 7, 10, 13, 14, 15, 16]]
# Log Scale queries
workload.queries = [query for query in workload.queries if query.nr not in [2, 17, 20]]

no_index_costs = {}
for query in workload.queries:
    no_index_costs[query.nr] = what_if_cost(Workload([query]), [])
    
draw_tpch_query_graph()

workload = pickle.load(open(f'{CSV_PATH}/workload_{BENCHMARK}_{len(QUERIES)}_queries.pickle', 'rb'))
workload.queries = [query for query in workload.queries if query.nr in [2, 17, 20]]

draw_tpch_query_graph(yscale='log', small=True)

pdc.close()

## TPCDS - Cost

In [None]:
CSV_PATH = '../tpcds_wo_4_6_9_10_11_32_35_41_95'
BENCHMARK = 'tpcds'
SCALE_FACTOR = 10
QUERIES = [1, 2, 3, 5, 7, 8, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 96, 97, 98, 99]
XLIM = 12

In [None]:
cophy_memory_consumptions_mb = [250,500,1000,1500,2000,2500,3000,3500,4250,5000,5750,6500,8000,10000,12500,15000]
cophy_costs = [93552156.13, 88577114.46, 84585359.84, 80591549.13, 79399996.85, 78585000.31, 77435060.04, 76792132.12, 75967528.65, 75365979.03, 74943369.35, 74457654.57, 73613863.17, 72363984.63, 71776031.09, 71534655.96]
draw_cost_graph(cophy_costs, cophy_memory_consumptions_mb)
# new cophy
# cophy_costs = [94009579.24, 89908958.77, 86949597.35, 83122923.62, 82171561.17, 81545983.11, 80889568.68, 80249071.74, 79435135.96, 78835484.06, 78171122.91, 77577292.75, 76784020.72, 76237071.04, 75169511.67, 74351448.47]
# draw_cost_graph(cophy_costs, cophy_memory_consumptions_mb)

## TPCDS - Runtime

In [None]:
# Cophy What-If time: 579.6870040893555 - cost_requests: 394317 - cache_hits: 342140 - Gurobi Times:
cophy_what_if_time = 579.69
cophy_solver_times = [0.715391, 2.118141, 3.091555, 3.272472, 2.53551, 27.41455, 24.389079, 2.613326, 25.794448, 25.912374, 27.815068, 26.617466, 25.222031, 25.490362, 24.821388, 25.060508]
cophy_times = list(map(lambda x: x + cophy_what_if_time, cophy_solver_times))
draw_runtime_graph(minutes=True, cophy_runtimes=cophy_times, cophy_memory_consumption=cophy_memory_consumptions_mb)

## TPCDS - What If Graph

In [None]:
# draw_what_if_graph(million=True)

## JOB - Cost

In [None]:
CSV_PATH = '../job'
BENCHMARK = 'job'
SCALE_FACTOR = None
QUERIES = range(0, 113)
XLIM = 12

In [None]:
# draw_cost_graph()
# new cophy
cophy_memory_consumptions_mb = [250,500,1000,1500,2000,2500,3000,3500,5000,5750,6500,8000]
cophy_costs = [75079495.33, 69086234.95, 54977085.42, 50687539.75, 46159506.78, 37484609.32, 28011167.16, 27618166.14, 25963809.01, 25943071.1, 24859616.99, 24763974.91]
draw_cost_graph(cophy_costs, cophy_memory_consumptions_mb)

## JOB - Runtime

In [None]:
# Cophy What-If time: 822.8340845108032 - cost_requests: 305326 - cache_hits: 267996 - Gurobi Times:
cophy_what_if_time = 822.83
cophy_solver_times = [34.712032, 8.646469, 40.455446, 8.850123, 48.282356, 68.443695, 29.195716, 40.542401, 19.277244, 18.437171, 7.715893, 5.603071]
cophy_times = list(map(lambda x: x + cophy_what_if_time, cophy_solver_times))
draw_runtime_graph(minutes=True, cophy_runtimes=cophy_times, cophy_memory_consumption=cophy_memory_consumptions_mb)

## JOB - What If Graph

In [None]:
# GRAPH_SIZE = (5,2.2)
# draw_what_if_graph(million=True)

In [None]:
CSV_PATH = '../tpch_mssql'
BENCHMARK = 'tpch'
SCALE_FACTOR = 10
# all except 2,17,20 because they dominate costs by orders of magnitude
QUERIES = [1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,19,21,22]
XLIM = 10

In [None]:
draw_cost_graph()