In [1]:
import pandas as pd

from plotly.offline import init_notebook_mode
from sqlalchemy import create_engine

from query_flow.parsers.postgres_parser import PostgresParser
from query_flow.vizualizers.query_vizualizer import QueryVizualizer
from plotly.offline import plot, iplot


In [2]:
%load_ext autoreload
%autoreload 2

init_notebook_mode(connected=True)

In [3]:
con_str = 'postgresql:///etrabelsi_thesis'

## Identifying performance bottlenecks in a multiple queries

In [8]:
query_renderer = QueryVizualizer(parser=PostgresParser(is_compact=True))

In [9]:
with create_engine(con_str).connect() as con:
    execution_plan = con.execute("DROP INDEX if exists titles_index")

In [10]:
query1 = """
SELECT titles.title_id
FROM titles
INNER JOIN crew ON crew.title_id = titles.title_id
INNER JOIN people ON people.person_id = crew.person_id
WHERE genres = 'Comedy' 
  AND name in ('Owen Wilson', 'Adam Sandler', 'Jason Segel')
"""


query2 = """
SELECT titles.title_id
FROM titles
WHERE genres = 'Comedy' 
UNION 
SELECT titles.title_id
FROM titles
WHERE genres = 'Action' 
"""

queries = [query1, query2]


In [11]:
flow_df = query_renderer.get_flow_df(queries, con_str=con_str)
query_renderer.vizualize(flow_df, metrics=["actual_duration"], title="crap",open_=False)



In [12]:
flow_df = query_renderer.get_flow_df(queries, con_str=con_str)
query_renderer.vizualize(flow_df, metrics=["actual_rows"], title="crap",open_=False)



In [18]:
with create_engine(con_str).connect() as con:
    execution_plan = con.execute("CREATE INDEX if not exists titles_index ON titles(title_id,genres) WHERE genres ='Comedy'")

In [43]:
flow_df = query_renderer.get_flow_df(queries, con_str=con_str)
# query_renderer.vizualize(flow_df, metrics=["actual_duration"], title="crap",open_=False)



In [51]:
flow_df = query_renderer.get_flow_df(queries, con_str=con_str)
flow_df.loc[3, "label"] =  'Titles*'
flow_df.loc[6, "source"] = 2
flow_df.loc[6, "target"] = 3
flow_df = flow_df.append({"actual_duration": 10, "source": 3, "target": 11, "label": "", "redundent_operation":False}, ignore_index=True)
flow_df[["actual_duration", "source", "target", "label"]]
query_renderer.vizualize(flow_df, metrics=["actual_duration"], title="crap",open_=False)



In [15]:
with create_engine(con_str).connect() as con:
    execution_plan = con.execute("DROP INDEX if exists titles_index")