# pg_server_pg_stat_statement
Shows queries from pg_stat_statements
Cells: 
- Configure: Connect to a specfic DB
- Current size of each table: in KB, dead rows, bloat, data size, all indexes size,  

In [15]:
import sqlalchemy
import pandas as pd
import configparser
import matplotlib.pyplot as plt 

# Read from the Config file
config = configparser.ConfigParser() 
config.read_file(open(r'../ipynb.cfg'))

con_str = config.get('con_str', 'PG_AIRBASES') 
engine = sqlalchemy.create_engine(con_str)

try:
    connection = engine.connect()
    print ("Opened Connection")
except (Exception, sqlalchemy.exc.SQLAlchemyError) as error:
    print("Error while connecting to PostgreSQL database:", error)


Opened Connection


## pg_stat_statement configuration
Notice! the notebook requires the extension pg_stat_statement. 
Show the configuration of the extension


In [16]:
from sqlalchemy import create_engine
from sqlalchemy.exc import SQLAlchemyError
# While SQL can handle %, in python we need %% 
sql_command = """
select * 
from pg_settings
where name like 'pg_stat_statements.%%'
"""

try:
    # Execute the SQL command
    df = pd.read_sql_query(sql_command, connection)
    #Set the display options to show all columns without truncation
    pd.set_option('display.max_columns', None)
    pd.set_option('display.expand_frame_repr', False)   
    print(df)
except (SQLAlchemyError, ValueError) as e:
    # Handle any errors or raised exceptions
    raise e

                                name setting  unit            category                                         short_desc extra_desc        context  vartype              source min_val     max_val          enumvals boot_val reset_val                         sourcefile  sourceline  pending_restart
0             pg_stat_statements.max   10000  None  Customized Options  Sets the maximum number of statements tracked ...       None     postmaster  integer  configuration file     100  1073741823              None     5000     10000  /rdsdbdata/config/postgresql.conf        76.0            False
1            pg_stat_statements.save      on  None  Customized Options  Save pg_stat_statements statistics across serv...       None         sighup     bool             default    None        None              None       on        on                               None         NaN            False
2           pg_stat_statements.track     all  None  Customized Options  Selects which statements are track

## Top 50 Queries
Shows the top 25 queries by the total exec time. 
Notice! The measures are since the last reset of the table. Use Metis to get the hourly diff.  

In [18]:
import plotly.express as px

sql_command = """
select
	(total_exec_time + total_plan_time)::int as total_time,
	total_exec_time::int,
	total_plan_time::int,
	mean_exec_time::int,
	calls,
	query
from
	pg_stat_statements
order by
	total_time desc
limit 50;
"""

try:
    # Execute the SQL command
    df_top_tables_by_size = pd.read_sql_query(sql_command, connection)
    print(df_top_tables_by_size)

except (SQLAlchemyError, ValueError) as e:
    # Handle any errors or raised exceptions
    raise e

    total_time  total_exec_time  total_plan_time  mean_exec_time    calls                                              query
0     27574159         27574159                0           19337     1426  SELECT * FROM postgres_air.boarding_pass WHERE...
1     18120968         18120968                0              70   259208  SELECT calls, datname, local_blks_dirtied, loc...
2     16285833         16285833                0            2599     6266  EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, TIM...
3      6801359          6801359                0            3479     1955  SELECT \n        count(distinct ss.id) AS serv...
4      5551708          5551708                0             147    37851            SELECT public.load_postgres_log_files()
5      5048302          5048302                0             115    43987                       MOVE ALL IN "query-cursor_1"
6      5000742          5000742                0           96168       52  with relevant_ids as (\nselect\ndistinct query...
