In [13]:
import duckdb
import pyarrow as pa
import pyarrow.parquet as pq
from pathlib import Path

BRONZE_DETALLISTAS_PATH = '../data/bronze_detallistas.parquet'
BRONZE_VENTAS_PATH = '../data/bronze_ventas.parquet'
SILVER_VENTAS_PATH = '../data/silver_ventas_establecimiento.parquet'

def create_sales_silver_parquet(file_name: str):
    con = duckdb.connect()
    # Use paths from config
    con.sql(f"CREATE OR REPLACE TABLE detallistas AS SELECT * FROM read_parquet('{BRONZE_DETALLISTAS_PATH}')")
    con.sql(f"CREATE OR REPLACE TABLE ventas AS SELECT * FROM read_parquet('{BRONZE_VENTAS_PATH}')")

    copy_sql = f"""
    COPY (
        SELECT 
            d.establecimiento,
            v.material,
            v.calday,
            v.promo_id,
            SUM(v.volume_ap) AS volume_ap,
            SUM(v.cantidad_umb) AS cantidad_umb,
            d.type AS tipo
        FROM ventas v
        JOIN detallistas d 
        ON CAST(v.detallista AS VARCHAR) = d.detallista
        GROUP BY d.establecimiento, v.material, v.calday, v.promo_id, d.type
    ) TO '{file_name}' (FORMAT parquet)
    """
    
    con.execute(copy_sql)
    con.close()

create_sales_silver_parquet(str(SILVER_VENTAS_PATH))

con = duckdb.connect()
# Show first 5 rows of the created silver table
print("\nFirst 5 rows of silver_ventas.parquet:")
con.sql(f"SELECT * FROM read_parquet('{SILVER_VENTAS_PATH}') LIMIT 5").show()

con.close() # Good practice to close the connection




First 5 rows of silver_ventas.parquet:
┌─────────────────┬──────────┬─────────────────────┬──────────────────────┬───────────┬──────────────┬─────────────────┐
│ establecimiento │ material │       calday        │       promo_id       │ volume_ap │ cantidad_umb │      tipo       │
│     varchar     │ varchar  │      timestamp      │       varchar        │  double   │    double    │     varchar     │
├─────────────────┼──────────┼─────────────────────┼──────────────────────┼───────────┼──────────────┼─────────────────┤
│ 8100240876      │ TB8      │ 2024-11-26 00:00:00 │ NULL                 │       8.0 │          1.0 │ Bar Cervecería  │
│ 8100032055      │ PI13     │ 2024-11-26 00:00:00 │ NULL                 │      7.92 │          1.0 │ Restaurante     │
│ 8100258434      │ FL13SPN  │ 2024-11-26 00:00:00 │ NULL                 │      23.1 │          2.0 │ Bar Cervecería  │
│ 8100036860      │ VO13     │ 2024-11-26 00:00:00 │ 00000000000080619348 │       0.0 │          0.0 │ Bar Cervec