In [1]:
import pandas as pd

from plotly.offline import init_notebook_mode
from sqlalchemy import create_engine

from query_flow.parsers.postgres_parser import PostgresParser
from query_flow.vizualizers.query_vizualizer import QueryVizualizer
from plotly.offline import plot, iplot


In [2]:
%load_ext autoreload
%autoreload 2

init_notebook_mode(connected=True)

In [3]:
con_str = 'postgresql:///etrabelsi_thesis'

## Identifying performance bottlenecks in a multiple queries

In [4]:
query_renderer = QueryVizualizer(parser=PostgresParser())

In [5]:
36451.822 - 27376.277

9075.545000000002

In [6]:
#          0         1          2      3         4              5                  6              7                           8
label = ["Title" , "Title*", "Crew", "People", "People*", "Title* U Title*", "Crew ⋈ People*", "Crew ⋈ People* ⋈ Titles*", "Finish"]
source = [0      , 0       , 0       , 1      , 1       , 2         , 3    , 1               , 6                    , 4               , 7    , 5]
target = [1      , 1       , 1       , 5      , 5       , 6         , 4    , 7               , 7                    , 6               , 8    , 8]
value =  [975.287, 677.225 , 8071.176, 975.287, 677.225 , 6108.962  , 6108 , 8071.176        , 36451.822 - 27376.277, 6108            , 44683- 36451, 1885-975.287]

color =  ["#B0C4DE", "#B0C4DE" , "#FFE4E1"  , "#B0C4DE", "#B0C4DE" , "#FFE4E1"    ,"#FFE4E1", "#FFE4E1"          , "#FFE4E1"               , "#FFE4E1"          , "#FFE4E1"      , "#B0C4DE"]
color_node = ["black"] * len(color)
layout = dict(font=dict(size=10), height=750)
data_trace = dict(type="sankey",
                  orientation="h",
                  valueformat=",",
                  node=dict(pad=200, label=label, color=color_node),
                  link=dict(source=source,target=target, value=value, color=color))

iplot(dict(data=[data_trace], layout=layout),
      validate=False)

In [7]:

# query_renderer.vizualize(
#         pd.read_csv("/Users/etrabelsi/IdeaProjects/thesis/crap.csv"), title='Missing Records in Where Clause', metrics=['actual_rows'],
#         open_=False,
#     )

In [8]:
with create_engine(con_str).connect() as con:
    execution_plan = con.execute("CREATE INDEX if not exists titles_index ON titles(title_id,genres) WHERE genres like '%%Comedy%%'")

In [17]:
query1 = """
SELECT titles.title_id
FROM titles
INNER JOIN crew ON crew.title_id = titles.title_id
INNER JOIN people ON people.person_id = crew.person_id
WHERE genres like '%Comedy%' 
  AND name in ('Owen Wilson', 'Adam Sandler', 'Jason Segel')
"""


query2 = """
SELECT titles.title_id
FROM titles
WHERE genres = 'Comedy' 
UNION 
SELECT titles.title_id
FROM titles
WHERE genres = 'Action' 
"""

queries = [query1, query2]
flow_df = query_renderer.get_flow_df(queries, con_str=con_str)
flow_df

Unnamed: 0,source,target,operation_type,temp_written_blocks,actual_loops,shared_read_blocks,total_cost,shared_written_blocks,local_dirtied_blocks,temp_read_blocks,...,label,label_metadata,query_hash,actual_duration,estimated_cost,actual_startup_duration,redundent_operation,actual_duration_pct,estimated_cost_pct,actual_plan_rows_ratio
0,0,1,Seq Scan,0,2,113393,160668.53,0,0,0,...,Titles,,949dfc92908213dc8cded2b0927d9bb61d3457e3b32f87...,780.331,160668.53,4.476,False,100.0,100.0,True
1,1,4,Where,0,2,113393,160668.53,0,0,0,...,Titles*,Filter condition: (titles_1.genres = 'Action':...,949dfc92908213dc8cded2b0927d9bb61d3457e3b32f87...,0.0,0.0,-160664.054,False,0.0,0.0,False
2,2,3,Seq Scan,0,3,113270,160668.53,0,0,0,...,Titles,,949dfc92908213dc8cded2b0927d9bb61d3457e3b32f87...,552.83,160668.53,0.053,False,100.0,100.0,True
3,3,4,Where,0,3,113270,160668.53,0,0,0,...,Titles*,Filter condition: (titles.genres = 'Comedy'::t...,949dfc92908213dc8cded2b0927d9bb61d3457e3b32f87...,0.0,0.0,-160668.477,False,0.0,0.0,False
4,4,5,Append,0,3,226663,329370.54,0,0,0,...,Titles* U Titles*,,949dfc92908213dc8cded2b0927d9bb61d3457e3b32f87...,320.451,168702.01,-160665.504,False,29.111214,51.21952,False
5,5,6,Unique,1356,1,226663,450382.96,0,0,1347,...,Unique,,949dfc92908213dc8cded2b0927d9bb61d3457e3b32f87...,565.251,121012.42,-327936.512,False,33.927959,26.868783,False
6,7,8,Seq Scan,0,3,79922,141258.61,0,0,0,...,People,,c2b4a52e29196408d05505b4757755f47e79b17445f0e0...,724.767,141258.61,346.431,False,100.0,100.0,True
7,8,10,Where,0,3,79922,141258.61,0,0,0,...,People*,"Filter condition: (people.name = ANY ('{""Owen ...",c2b4a52e29196408d05505b4757755f47e79b17445f0e0...,0.0,0.0,-140912.179,False,0.0,0.0,False
8,9,10,Seq Scan,0,3,392164,566508.77,0,0,0,...,Crew,,c2b4a52e29196408d05505b4757755f47e79b17445f0e0...,2040.827,566508.77,0.635,False,100.0,100.0,False
9,10,13,Hash Join,0,3,472086,771760.8,0,0,0,...,People* ⋈ Crew,"Hash Cond ('Inner', '(crew.person_id = people....",c2b4a52e29196408d05505b4757755f47e79b17445f0e0...,2903.178,205252.03,-565687.101,False,58.721178,26.595291,True


In [18]:

flow_df = query_renderer.get_flow_df(queries, con_str=con_str)
flow_df

Unnamed: 0,source,target,operation_type,temp_written_blocks,actual_loops,shared_read_blocks,total_cost,shared_written_blocks,local_dirtied_blocks,temp_read_blocks,...,label,label_metadata,query_hash,actual_duration,estimated_cost,actual_startup_duration,redundent_operation,actual_duration_pct,estimated_cost_pct,actual_plan_rows_ratio
0,0,1,Seq Scan,0,2,113229,160668.53,0,0,0,...,Titles,,cedfb6ef9ed8549bfd30cb66f735b10cfd4e6706eac17c...,680.191,160668.53,1.566,False,100.0,100.0,True
1,1,4,Where,0,2,113229,160668.53,0,0,0,...,Titles*,Filter condition: (titles_1.genres = 'Action':...,cedfb6ef9ed8549bfd30cb66f735b10cfd4e6706eac17c...,0.0,0.0,-160666.964,False,0.0,0.0,False
2,2,3,Seq Scan,0,3,113105,160668.53,0,0,0,...,Titles,,cedfb6ef9ed8549bfd30cb66f735b10cfd4e6706eac17c...,462.835,160668.53,0.059,False,100.0,100.0,True
3,3,4,Where,0,3,113105,160668.53,0,0,0,...,Titles*,Filter condition: (titles.genres = 'Comedy'::t...,cedfb6ef9ed8549bfd30cb66f735b10cfd4e6706eac17c...,0.0,0.0,-160668.471,False,0.0,0.0,False
4,4,5,Append,0,3,226334,329370.54,0,0,0,...,Titles* U Titles*,,cedfb6ef9ed8549bfd30cb66f735b10cfd4e6706eac17c...,259.089,168702.01,-160667.44,False,27.583788,51.21952,False
5,5,6,Unique,1356,1,226334,450382.96,0,0,1347,...,Unique,,cedfb6ef9ed8549bfd30cb66f735b10cfd4e6706eac17c...,463.047,121012.42,-328151.41,False,33.019902,26.868783,False
6,6,7,Seq Scan,0,3,80124,141258.61,0,0,0,...,People,,94858c1dc2d6491143b8c34238d21ff7d5e191224c5a4c...,-793.005,-309124.35,-450255.092,False,-130.145473,-218.835758,True
7,7,9,Where,0,3,80124,141258.61,0,0,0,...,People*,"Filter condition: (people.name = ANY ('{""Owen ...",94858c1dc2d6491143b8c34238d21ff7d5e191224c5a4c...,0.0,0.0,-141130.742,False,0.0,0.0,False
8,8,9,Seq Scan,0,3,392160,566508.77,0,0,0,...,Crew,,94858c1dc2d6491143b8c34238d21ff7d5e191224c5a4c...,1787.319,566508.77,0.504,False,100.0,100.0,False
9,9,12,Hash Join,0,3,472284,771760.8,0,0,0,...,People* ⋈ Crew,"Hash Cond ('Inner', '(crew.person_id = people....",94858c1dc2d6491143b8c34238d21ff7d5e191224c5a4c...,2481.218,205252.03,-565819.333,False,58.128066,26.595291,True


In [23]:
query_renderer.vizualize(flow_df, metrics=["actual_rows"], title="crap",open_=False)

In [16]:
flow_df[["label", "source", "target", "actual_total_time", "label_metadata", "query_hash"]]

Unnamed: 0,label,source,target,actual_total_time,label_metadata,query_hash
0,Titles,0,1,590.997,,b02c98736abde9d4aea852005f97b56c2302a0156dce6d...
1,Titles*,1,4,590.997,Filter condition: (titles_1.genres = 'Action':...,b02c98736abde9d4aea852005f97b56c2302a0156dce6d...
2,Titles,2,3,408.55,,b02c98736abde9d4aea852005f97b56c2302a0156dce6d...
3,Titles*,3,4,408.55,Filter condition: (titles.genres = 'Comedy'::t...,b02c98736abde9d4aea852005f97b56c2302a0156dce6d...
4,Titles* U Titles*,4,5,822.875,,b02c98736abde9d4aea852005f97b56c2302a0156dce6d...
5,Unique,5,6,1249.485,,b02c98736abde9d4aea852005f97b56c2302a0156dce6d...
6,People,6,7,553.37,,625fcb20137e4aba6e5ba4d2cb64df1040b98b7867438e...
7,People*,7,9,553.37,"Filter condition: (people.name = ANY ('{""Owen ...",625fcb20137e4aba6e5ba4d2cb64df1040b98b7867438e...
8,Crew,8,9,1513.296,,625fcb20137e4aba6e5ba4d2cb64df1040b98b7867438e...
9,People* ⋈ Crew,9,12,3682.235,"Hash Cond ('Inner', '(crew.person_id = people....",625fcb20137e4aba6e5ba4d2cb64df1040b98b7867438e...


In [11]:
#          0         1          2      3         4              5                  6              7                           8
label = ["Title" , "Title*", "Crew", "People", "People*", "Title* U Title*", "Crew ⋈ People*", "Crew ⋈ People* ⋈ Titles*", "Finish"]
source = [0      , 0       , 0       , 1      , 1       , 2         , 3    , 1               , 6                    , 4               , 7    , 5]
target = [1      , 1       , 1       , 5      , 5       , 6         , 4    , 7               , 7                    , 6               , 8    , 8]
value =  [675.287, 10.225  , 1       , 975.287, 677.225 , 6108.962  , 6108 , 1               , 36451.822 - 27376.277, 6108            , 44683- 36451, 1885-975.287]

color =  ["#B0C4DE", "#B0C4DE" , "#FFE4E1"  , "#B0C4DE", "#B0C4DE" , "#FFE4E1"    ,"#FFE4E1", "#FFE4E1"          , "#FFE4E1"               , "#FFE4E1"          , "#FFE4E1"      , "#B0C4DE"]
color_node = ["black"] * len(color)
layout = dict(font=dict(size=10), height=750)
data_trace = dict(type="sankey",
                  orientation="h",
                  valueformat=",",
                  node=dict(pad=200, label=label, color=color_node),
                  link=dict(source=source,target=target, value=value, color=color))

iplot(dict(data=[data_trace], layout=layout),
      validate=False)

In [12]:
with create_engine(con_str).connect() as con:
    execution_plan = con.execute("DROP INDEX if exists titles_index")