In [5]:
from plotly.offline import init_notebook_mode
from sqlalchemy import create_engine

from query_flow.parsers.postgres_parser import PostgresParser
from query_flow.vizualizers.query_vizualizer import QueryVizualizer

In [6]:
%load_ext autoreload
%autoreload 2

init_notebook_mode(connected=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
con_str = 'postgresql:///etrabelsi_thesis'

## Query

In [8]:
query ="""
SELECT titles.title_id
FROM titles
INNER JOIN crew ON crew.title_id = titles.title_id
INNER JOIN people ON people.person_id = crew.person_id
WHERE genres ilike '%Comedy%' 
  AND name in ('Owen Wilson', 'Adam Sandler', 'Jason Segel')
"""

In [9]:
query_renderer = QueryVizualizer(parser=PostgresParser())

In [14]:
flow_df = query_renderer.get_flow_df(query, con_str)
flow_df[["source","target","operation_type","actual_rows","actual_duration", "label", "label_metadata"]]



Unnamed: 0,source,target,operation_type,actual_rows,actual_duration,label,label_metadata
0,0,1,Seq Scan,3446261,1110.74,People,
1,1,3,Where,3,0.0,People*,"Filter condition: (people.name = ANY ('{""Owen ..."
2,2,3,Seq Scan,13651901,2809.074,Crew,
3,3,6,Hash Join,565,4437.907,People* ⋈ Crew,Inner join with (crew.person_id = people.perso...
4,4,5,Seq Scan,2379234,1798.331,Titles,
5,5,6,Where,489076,0.0,Titles*,Filter condition: (titles.genres ~~* '%Comedy%...
6,6,7,Hash Join,186,1963.614,People* ⋈ Crew ⋈ Titles*,Inner join with (titles.title_id = crew.title_id)


In [13]:
query_renderer.vizualize(flow_df, title="Basic Flow", metrics=["actual_rows"], open_=False)


In [14]:
flow_df[["source","target","operation_type","actual_rows","label", "label_metadata"]]

Unnamed: 0,source,target,operation_type,actual_rows,label,label_metadata
0,0,1,Seq Scan,3446261,People,
1,1,3,Where,3,People*,"Filter condition: (people.name = ANY ('{""Owen ..."
2,2,3,Seq Scan,13651901,Crew,
3,3,6,Hash Join,565,People* ⋈ Crew,"Hash Cond ('Inner', '(crew.person_id = people...."
4,4,5,Seq Scan,2379234,Titles,
5,5,6,Where,489076,Titles*,Filter condition: (titles.genres ~~* '%Comedy%...
6,6,7,Hash Join,186,People* ⋈ Crew ⋈ Titles*,"Hash Cond ('Inner', '(titles.title_id = crew.t..."


In [15]:
query_renderer.parser.from_query(query, con_str, False)

{'Node Type': 'Gather',
 'Parallel Aware': False,
 'Startup Cost': 772761.07,
 'Total Cost': 935729.46,
 'Plan Rows': 11,
 'Plan Width': 10,
 'Actual Startup Time': 7022.913,
 'Actual Total Time': 8846.602,
 'Actual Rows': 558,
 'Actual Loops': 1,
 'Output': ['titles.title_id'],
 'Workers Planned': 2,
 'Workers Launched': 2,
 'Single Copy': False,
 'Shared Hit Blocks': 6706,
 'Shared Read Blocks': 594844,
 'Shared Dirtied Blocks': 0,
 'Shared Written Blocks': 0,
 'Local Hit Blocks': 0,
 'Local Read Blocks': 0,
 'Local Dirtied Blocks': 0,
 'Local Written Blocks': 0,
 'Temp Read Blocks': 0,
 'Temp Written Blocks': 0,
 'Plans': [{'Node Type': 'Hash Join',
   'Parent Relationship': 'Outer',
   'Parallel Aware': True,
   'Join Type': 'Inner',
   'Startup Cost': 771761.07,
   'Total Cost': 934728.36,
   'Plan Rows': 5,
   'Plan Width': 10,
   'Actual Startup Time': 7010.392,
   'Actual Total Time': 8823.346,
   'Actual Rows': 186,
   'Actual Loops': 3,
   'Output': ['titles.title_id'],
   'I