In [1]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Conection config
load_dotenv()  # load var .env


True

In [2]:
DB_USER = os.getenv("POSTGRES_USER")
DB_PASSWORD = os.getenv("POSTGRES_PASSWORD")
DB_PORT = os.getenv("POSTGRES_PORT")
DB_NAME = os.getenv("POSTGRES_DB")
DB_HOST = "localhost"

conn_string = (
    f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)
engine = create_engine(conn_string)

# Tables list loaded in Postgres
tables_to_analize = [
    "orders",
    "customers",
    "order_items",
    "products",
    "reviews",
    "category_translation",
]

In [3]:
summary = []

print("Generating a table with specifications of each table...")

# Query to SQL -> Pandas calculate it
for table in tables_to_analize:
    try:
        # Read complete the tables
        query = f"SELECT * FROM {table}"
        df = pd.read_sql(query, engine)

        # Rows | Columns
        total_rows = df.shape[0]
        total_cols = df.shape[1]

        # Total number of empty cells in the entire table
        total_nulls = df.isnull().sum().sum()

        # Columns that have at least one null
        cols_with_nulls = df.columns[df.isnull().any()].tolist()
        qty_null_cols = len(cols_with_nulls)

        # Add the info to the list
        summary.append(
            {
                "dataset": table,
                "n_rows": total_rows,
                "n_cols": total_cols,
                "null_amount": total_nulls,
                "qty_null_columns": qty_null_cols,
                "null_columns": ", ".join(cols_with_nulls),
            }
        )
        print(f"Analized: {table}")

    except Exception as e:
        print(f"Error in {table}: {e}")


df_report = pd.DataFrame(summary)

# Stylizing the report
estilo_tabla = (
    df_report.style.background_gradient(
        cmap="Blues", subset=["n_rows", "n_cols", "null_amount", "qty_null_columns"]
    )
    .format({"n_rows": "{:,}", "null_amount": "{:,}"})
    .hide(axis="index")
    .set_properties(**{"text-align": "left"})
)

# Show the table
estilo_tabla

Generating a table with specifications of each table...
Analized: orders
Analized: customers
Analized: order_items
Analized: products
Analized: reviews
Analized: category_translation


dataset,n_rows,n_cols,null_amount,qty_null_columns,null_columns
orders,99441,8,4908,3,"order_approved_at, order_delivered_carrier_date, order_delivered_customer_date"
customers,99441,5,0,0,
order_items,112650,7,0,0,
products,32951,9,2448,8,"product_category_name, product_name_lenght, product_description_lenght, product_photos_qty, product_weight_g, product_length_cm, product_height_cm, product_width_cm"
reviews,99224,7,145903,2,"review_comment_title, review_comment_message"
category_translation,71,2,0,0,
