# Predicting DWPC Query runtime ahead of time

In [None]:
import json

import matplotlib.pyplot
import pandas
import numpy
import seaborn
import mpld3

%matplotlib inline

In [None]:
path = 'all-features/data/metapaths.json'
with open(path) as fp:
    metapaths = json.load(fp)

In [None]:
auroc_df = pandas.read_table('data/all-features/auroc.tsv')
auroc_df.head(2)

In [None]:
cols = ['sequential_complexity', 'optimal_join_complexity', 'midpoint_join_complexity']

rows = [[
        item['abbreviation'], 
        item['join_complexities'][item['midpoint_index']], 
        item['join_complexities'][item['optimal_join_index']],
        item['join_complexities'][-1],
        item['join_complexities'][0],
    ] for item in metapaths]
complexity_df = pandas.DataFrame(rows, columns=
    ['metapath', 'midpoint_complexity', 'optimal_complexity', 'forward_complexity', 'backward_complexity'])
complexity_df = auroc_df.merge(complexity_df)
complexity_df['log10_seconds_per_query'] = numpy.log10(complexity_df['seconds_per_query'])

In [None]:
complexity_df.head(2)

In [None]:
complexity_df

In [None]:
ax = seaborn.regplot('forward_complexity', 'backward_complexity', data=complexity_df,
    lowess=True, scatter_kws={'alpha': 0.5}, line_kws={'color': 'black'}, ci=False)
points = ax.collections[0]
labels = complexity_df.metapath.tolist()
tooltip = mpld3.plugins.PointLabelTooltip(points, labels)
mpld3.plugins.connect(ax.figure, tooltip)
mpld3.display()

## sequential_complexity

In [None]:
matplotlib.pyplot.figure(figsize=(10, 7))
ax = seaborn.regplot('forward_complexity', 'log10_seconds_per_query', data=complexity_df,
    lowess=True, scatter_kws={'alpha': 0.5}, line_kws={'color': 'black'}, ci=False)
points = ax.collections[0]
labels = complexity_df.metapath.tolist()
tooltip = mpld3.plugins.PointLabelTooltip(points, labels)
mpld3.plugins.connect(ax.figure, tooltip)
mpld3.display()

In [None]:
matplotlib.pyplot.figure(figsize=(10, 7))
ax = seaborn.regplot('backward_complexity', 'log10_seconds_per_query', data=complexity_df,
    lowess=True, scatter_kws={'alpha': 0.5}, line_kws={'color': 'black'}, ci=False)
points = ax.collections[0]
labels = complexity_df.metapath.tolist()
tooltip = mpld3.plugins.PointLabelTooltip(points, labels)
mpld3.plugins.connect(ax.figure, tooltip)
mpld3.display()

## optimal_join_complexity

In [None]:
matplotlib.pyplot.figure(figsize=(10, 7))
ax = seaborn.regplot('optimal_complexity', 'log10_seconds_per_query', data=complexity_df,
    lowess=True, scatter_kws={'alpha': 0.5}, line_kws={'color': 'black'}, ci=False)
points = ax.collections[0]
labels = complexity_df.metapath.tolist()
tooltip = mpld3.plugins.PointLabelTooltip(points, labels)
mpld3.plugins.connect(ax.figure, tooltip)
mpld3.display()

## midpoint_join_complexity

In [None]:
matplotlib.pyplot.figure(figsize=(10, 7))
ax = seaborn.regplot('midpoint_complexity', 'log10_seconds_per_query', data=complexity_df,
    lowess=True, scatter_kws={'alpha': 0.5}, line_kws={'color': 'black'}, ci=False)
points = ax.collections[0]
labels = complexity_df.metapath.tolist()
tooltip = mpld3.plugins.PointLabelTooltip(points, labels)
mpld3.plugins.connect(ax.figure, tooltip)
mpld3.display()