# pg_db_tables
Shows data about the tables
Cells: 
- Configure: Connect to a specfic DB
- Current size of each table: in KB, dead rows, bloat, data size, all indexes size,  

In [16]:
import sqlalchemy
import pandas as pd
import configparser
import matplotlib.pyplot as plt 

# Read from the Config file
config = configparser.ConfigParser() 
config.read_file(open(r'../ipynb.cfg'))

con_str = config.get('con_str', 'PG_AIRBASES') 
engine = sqlalchemy.create_engine(con_str)

try:
    connection = engine.connect()
    print ("Opened Connection")
except (Exception, sqlalchemy.exc.SQLAlchemyError) as error:
    print("Error while connecting to PostgreSQL database:", error)


Exception during reset or similar
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/sqlalchemy/pool/base.py", line 739, in _finalize_fairy
    fairy._reset(pool)
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/sqlalchemy/pool/base.py", line 988, in _reset
    pool._dialect.do_rollback(self)
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 682, in do_rollback
    dbapi_connection.rollback()
psycopg2.OperationalError: could not receive data from server: Operation timed out
SSL SYSCALL error: Operation timed out



Opened Connection


## Tables Size
Show the tables and their size in KB and the number of rows. 
Notice! the notebook requires the extension pg_stat_statement 

TODO: show Bloat too

In [17]:
from sqlalchemy import create_engine
from sqlalchemy.exc import SQLAlchemyError

sql_command = """
SELECT relid,
		schemaname, 
    relname as table_name, 
    (schemaname || '.' || relname) as full_table_name,
    n_live_tup as rows,
    n_dead_tup as dead_rows,
    n_mod_since_analyze,
    case 
    	when n_live_tup = 0 THEN 0
      else  n_mod_since_analyze / n_live_tup 
      end as pct_mod_since_analyze,
    last_analyze as last_analyze_date,
    last_autoanalyze as last_autoanalyze_date,
    pg_total_relation_size(relid) / 1024 as total_table_size_kb, 
    pg_table_size(relid) / 1024 as table_size_kb,
    pg_indexes_size(relid) / 1024 as indexes_size_kb,
    pg_size_pretty(pg_total_relation_size(relid)) as total_table_size_pretty, 
    pg_size_pretty(pg_table_size(relid)) as table_size_pretty,
    pg_size_pretty(pg_indexes_size(relid)) as index_size_pretty
    
FROM pg_stat_user_tables
ORDER BY schemaname, 
    relname
"""

try:
    # Execute the SQL command
    df = pd.read_sql_query(sql_command, connection)
    # Set the display options to show all columns without truncation
    pd.set_option('display.max_columns', None)
    pd.set_option('display.expand_frame_repr', False)   
    print(df)
except (SQLAlchemyError, ValueError) as e:
    # Handle any errors or raised exceptions
    raise e

      relid    schemaname                         table_name                          full_table_name      rows  dead_rows  n_mod_since_analyze  pct_mod_since_analyze                last_analyze_date            last_autoanalyze_date  total_table_size_kb  table_size_kb  indexes_size_kb total_table_size_pretty table_size_pretty index_size_pretty
0   1057718          cron                                job                                 cron.job         5          1                    7                      1                              NaT                              NaT                   48             16               32                   48 kB             16 kB             32 kB
1   1057740          cron                    job_run_details                     cron.job_run_details         0          0                    0                      0                              NaT                              NaT               124240         123104             1136                  121 M

## Top 25 tables by size
Show the top 25 table by their size. The dataset also contains the 26th "table" which is the total of all other tables in the DB. The total size of all other tables might be large.
The UI is a treemap. 

TODO: show also as a table. Use pretty size. 

In [18]:
import plotly.express as px

sql_command = """
WITH top_tables AS (
    SELECT 
        c.oid, 
        relname, 
        n.nspname, 
        pg_total_relation_size(c.oid) AS total_size, 
  			pg_size_pretty(pg_total_relation_size(c.oid)) AS total_size_pretty
    FROM pg_catalog.pg_class AS c
    JOIN pg_namespace n 
        ON c.relnamespace = n.oid
    WHERE relkind = 'r'
        AND n.nspname NOT IN ('pg_toast', 'pg_catalog', 'information_schema')
    ORDER BY pg_total_relation_size(c.oid) DESC
    LIMIT 25
)

SELECT 
    oid, 
    relname, 
    nspname, 
    total_size, 
    total_size_pretty
FROM top_tables
UNION ALL
SELECT 
    null, 
    'ALL_OTHER_TABLES', 
    null, 
    sum(total_size), 
    pg_size_pretty(sum(total_size))
    
FROM (
    SELECT pg_total_relation_size(c.oid) AS total_size
    FROM pg_catalog.pg_class AS c
    JOIN pg_namespace n 
        ON c.relnamespace = n.oid
    WHERE relkind = 'r'
        AND n.nspname NOT IN ('pg_toast', 'pg_catalog', 'information_schema')
    ORDER BY pg_total_relation_size(c.oid) DESC
    OFFSET 25
) AS remaining_tables;


"""

try:
    # Execute the SQL command
    df_top_tables_by_size = pd.read_sql_query(sql_command, connection)
    print(df_top_tables_by_size)
    # Create a treemap using Plotly
    fig = px.treemap(df_top_tables_by_size, path=['relname'], values='total_size')

    # Show the treemap
    fig.show()

except (SQLAlchemyError, ValueError) as e:
    # Handle any errors or raised exceptions
    raise e

          oid                            relname       nspname    total_size total_size_pretty
0     71482.0                      boarding_pass  postgres_air  3.613925e+09           3447 MB
1    669643.0           boarding_pass_no_indexes  postgres_air  2.136752e+09           2038 MB
2     71518.0                          passenger  postgres_air  1.891779e+09           1804 MB
3     71495.0                        booking_leg  postgres_air  1.480098e+09           1412 MB
4     71489.0                            booking  postgres_air  1.169392e+09           1115 MB
5    671989.0                        passenger_2  postgres_air  8.486011e+08            809 MB
6    671977.0                    booking_flights        public  7.102054e+08            677 MB
7   3852615.0  pg_stat_tables_activity_snapshots         metis  1.570324e+08            150 MB
8   1057740.0                    job_run_details          cron  1.272218e+08            121 MB
9     71502.0                             flight  