# Creación de diferentes esquemas reducidos de la BBDD

Mediante el presente Notebook se busca confeccionar diferentes esquemas reducidos para facilitar el uso de la tablas que nos sean de interes para nuestro sistema agéntico de NL2SQL.

## Librerías



In [1]:
import os
import sys
from pathlib import Path
import pandas as pd

notebook_dir = os.getcwd() 
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))
sys.path.append(project_root)

from src.pg_sql import get_create_table_as, execute_commands, execute_query

pd.set_option('display.max_columns', None)

## Creación de esquemas

### Esquema de Tabla Plana (Flat Table)

Para crear este esquema, utilizaremos el script almacenado en el fichero `/data/database/postgres/reduced_schemas/flat_table/flat_table.sql`. Adicionalmente, debemos ejecutar algunos comandos previos para asegurarnos la existencia de un nuevo esquema en blanco.

In [2]:
FLAT_TABLE_SCHEMA = 'flat_table'
FLAT_TABLE_NAME = 'full_sales_data'
FLAT_TABLE_QUERY_PATH = '../data/database/postgres/reduced_schemas/flat_table/flat_table.sql'

commands_flat_table = []
commands_flat_table.append(f'DROP SCHEMA IF EXISTS {FLAT_TABLE_SCHEMA} CASCADE;')
commands_flat_table.append(f'CREATE SCHEMA {FLAT_TABLE_SCHEMA};')

flat_table_query = Path(FLAT_TABLE_QUERY_PATH).read_text()
commands_flat_table.append(get_create_table_as(flat_table_query, FLAT_TABLE_SCHEMA, FLAT_TABLE_NAME))

execute_commands(commands_flat_table, port=5432)

query_resulst = execute_query(
    f'SELECT * FROM {FLAT_TABLE_SCHEMA}.{FLAT_TABLE_NAME} LIMIT 10',
    port=5432
)

display(pd.DataFrame(query_resulst))


The following command was executed sucessfully:
DROP SCHEMA IF EXISTS flat_table CASCADE;

The following command was executed sucessfully:
CREATE SCHEMA flat_table;

The following command was executed sucessfully:
CREATE TABLE flat_table.full_sales_data AS (
WITH
    all_sales AS (
        (
            SELECT
                product_key,
                -- order_date_key,
                -- due_date_key,
                -- ship_date_key,
                NULL AS reseller_key,
                NULL AS employee_key,
                customer_key,
                promotion_key,
                -- currency_key,
                sales_territory_key,
                sales_order_number,
                sales_order_line_number,
                -- revision_number,
                order_quantity,
                unit_price,
                extended_amount,
                unit_price_discount_pct,
                discount_amount,
                product_standard_cost,
                total_product_

Unnamed: 0,sale_source,sales_order_number,sales_order_line_number,order_quantity,unit_price,extended_amount,unit_price_discount_pct,discount_amount,product_standard_cost,total_product_cost,sales_amount,tax_amt,freight,order_date,due_date,ship_date,spanish_product_name,english_product_name,spanish_product_category_name,english_product_category_name,spanish_product_subcategory_name,english_product_subcategory_name,customer_key,customer_full_name,customer_marital_status,customer_gender,customer_total_children,customer_number_children_at_home,reseller_key,reseller_business_type,reseller_name,reseller_product_line,employee_key,employee_full_name,sales_territory_city,sales_territory_state_province,sales_territory_region,sales_territory_country,sales_territory_group
0,reseller_sales,SO44124,14,1,20.1865,20.1865,0.0,0.0,12.0278,12.0278,20.1865,1.6149,0.5047,2023-03-01,2023-03-13,2023-03-08,"Casco deportivo: 100, rojo","Sport-100 Helmet, Red",Accesorio,Accessories,Casco,Helmets,,,,,,,3,Warehouse,Advanced Bike Components,Road,283,Jillian Carson,Irving,Texas,United States - Southwest,Estados Unidos,North America
1,reseller_sales,SO45568,11,3,20.1865,60.5595,0.0,0.0,12.0278,36.0834,60.5595,4.8448,1.514,2023-08-29,2023-09-10,2023-09-05,"Casco deportivo: 100, rojo","Sport-100 Helmet, Red",Accesorio,Accessories,Casco,Helmets,,,,,,,3,Warehouse,Advanced Bike Components,Road,283,Jillian Carson,Irving,Texas,United States - Southwest,Estados Unidos,North America
2,reseller_sales,SO46377,2,1,20.1865,20.1865,0.0,0.0,12.0278,12.0278,20.1865,1.6149,0.5047,2023-11-29,2023-12-11,2023-12-06,"Casco deportivo: 100, rojo","Sport-100 Helmet, Red",Accesorio,Accessories,Casco,Helmets,,,,,,,3,Warehouse,Advanced Bike Components,Road,283,Jillian Carson,Irving,Texas,United States - Southwest,Estados Unidos,North America
3,reseller_sales,SO43913,10,5,20.1865,100.9325,0.0,0.0,12.0278,60.139,100.9325,8.0746,2.5233,2023-01-29,2023-02-10,2023-02-05,"Casco deportivo: 100, rojo","Sport-100 Helmet, Red",Accesorio,Accessories,Casco,Helmets,,,,,,,40,Value Added Reseller,Journey Sporting Goods,Mountain,283,Jillian Carson,Laredo,Texas,United States - Southwest,Estados Unidos,North America
4,reseller_sales,SO44566,13,2,20.1865,40.373,0.0,0.0,12.0278,24.0556,40.373,3.2298,1.0093,2023-05-01,2023-05-13,2023-05-08,"Casco deportivo: 100, rojo","Sport-100 Helmet, Red",Accesorio,Accessories,Casco,Helmets,,,,,,,40,Value Added Reseller,Journey Sporting Goods,Mountain,283,Jillian Carson,Laredo,Texas,United States - Southwest,Estados Unidos,North America
5,reseller_sales,SO46103,1,2,20.1865,40.373,0.0,0.0,12.0278,24.0556,40.373,3.2298,1.0093,2023-10-29,2023-11-10,2023-11-05,"Casco deportivo: 100, rojo","Sport-100 Helmet, Red",Accesorio,Accessories,Casco,Helmets,,,,,,,40,Value Added Reseller,Journey Sporting Goods,Mountain,283,Jillian Carson,Laredo,Texas,United States - Southwest,Estados Unidos,North America
6,reseller_sales,SO46099,13,2,20.1865,40.373,0.0,0.0,12.0278,24.0556,40.373,3.2298,1.0093,2023-10-29,2023-11-10,2023-11-05,"Casco deportivo: 100, rojo","Sport-100 Helmet, Red",Accesorio,Accessories,Casco,Helmets,,,,,,,45,Warehouse,Every Bike Shop,Road,285,Tsvi Reiter,La Vergne,Tennessee,United States - Southeast,Estados Unidos,North America
7,reseller_sales,SO44129,16,2,20.1865,40.373,0.0,0.0,12.0278,24.0556,40.373,3.2298,1.0093,2023-03-01,2023-03-13,2023-03-08,"Casco deportivo: 100, rojo","Sport-100 Helmet, Red",Accesorio,Accessories,Casco,Helmets,,,,,,,54,Warehouse,Larger Cycle Shop,Road,281,Michael Blythe,Melville,New York,United States - Northeast,Estados Unidos,North America
8,reseller_sales,SO44797,32,3,20.1865,60.5595,0.0,0.0,12.0278,36.0834,60.5595,4.8448,1.514,2023-05-31,2023-06-12,2023-06-07,"Casco deportivo: 100, rojo","Sport-100 Helmet, Red",Accesorio,Accessories,Casco,Helmets,,,,,,,54,Warehouse,Larger Cycle Shop,Road,281,Michael Blythe,Melville,New York,United States - Northeast,Estados Unidos,North America
9,reseller_sales,SO45575,17,2,20.1865,40.373,0.0,0.0,12.0278,24.0556,40.373,3.2298,1.0093,2023-08-29,2023-09-10,2023-09-05,"Casco deportivo: 100, rojo","Sport-100 Helmet, Red",Accesorio,Accessories,Casco,Helmets,,,,,,,54,Warehouse,Larger Cycle Shop,Road,281,Michael Blythe,Melville,New York,United States - Northeast,Estados Unidos,North America


### Esquema de Ventas Reducido (Sales)

Para este esquema, utilizaremos los scripts almacenados en el directorio `/data/database/postgres/reduced_schemas/sales`, que nos permitirán tanto crear las que componen el esquema, como así también, configurar las `PRIMARY KEY` y `FOREIGN KEY` de cada tabla. Adicionalmente, debemos ejecutar algunos comandos previos para asegurarnos la existencia de un nuevo esquema en blanco.

In [4]:
SALES_SCHEMA = 'sales'
SALES_SCHEMA_TABLES_PATH = '../data/database/postgres/reduced_schemas/sales/'
SALES_SCHEMA_TABLES_PREFIXES = ['fact_', 'dim_']

commands_sales_schema = []
commands_sales_schema.append(f'DROP SCHEMA IF EXISTS {SALES_SCHEMA} CASCADE;')
commands_sales_schema.append(f'CREATE SCHEMA {SALES_SCHEMA};')

for file in Path(SALES_SCHEMA_TABLES_PATH).iterdir():
    if file.is_file() and file.suffix == '.sql' and any(
        file.name.startswith(prefix) for prefix in SALES_SCHEMA_TABLES_PREFIXES
    ):
        table_query = Path(file).read_text()
        table_name = '.'.join(file.name.split('.')[:-1])
        commands_sales_schema.append(get_create_table_as(table_query, SALES_SCHEMA, table_name))


for file in Path(SALES_SCHEMA_TABLES_PATH).iterdir():
    if file.is_file() and file.suffix == '.sql' and not any(
        file.name.startswith(prefix) for prefix in SALES_SCHEMA_TABLES_PREFIXES
    ):
        with open(file.as_posix(), 'r') as sql_file:
            for line in sql_file:
                line = line.strip()

                if not line:
                    continue

                command = line.replace('[SCHEMA]', SALES_SCHEMA)
                if not command.endswith(';'):
                    command += ';'

                commands_sales_schema.append(command)

execute_commands(commands_sales_schema, port=5432)

query_resulst = execute_query(
    f'SELECT * FROM {SALES_SCHEMA}.dim_sales_territory',
    port= 5432
)

display(pd.DataFrame(query_resulst))


The following command was executed sucessfully:
DROP SCHEMA IF EXISTS sales CASCADE;

The following command was executed sucessfully:
CREATE SCHEMA sales;

The following command was executed sucessfully:
CREATE TABLE sales.dim_customer AS (
WITH
    years_dif AS (
        SELECT
            (EXTRACT(YEAR FROM CURRENT_DATE) - EXTRACT(YEAR FROM MAX(order_date))) + 1 AS years_difference

        FROM (
            SELECT order_date FROM adventure_works.fact_internet_sales
            UNION
            SELECT order_date FROM adventure_works.fact_reseller_sales
        )
    )


SELECT
    customer_key,
    geography_key,
    -- customer_alternate_key,
    -- title,
    -- first_name,
    -- middle_name,
    -- last_name,
    -- name_style,
    CONCAT(first_name, ' ', last_name) AS customer_full_name,
    (birth_date + ((SELECT years_difference FROM years_dif)::TEXT || ' years')::INTERVAL)::DATE AS birth_date,
    marital_status,
    -- suffix,
    gender,
    -- email_address,
    yearly_

Unnamed: 0,sales_territory_key,sales_territory_region,sales_territory_country,sales_territory_group
0,1,Northwest,United States,North America
1,2,Northeast,United States,North America
2,3,Central,United States,North America
3,4,Southwest,United States,North America
4,5,Southeast,United States,North America
5,6,Canada,Canada,North America
6,7,France,France,Europe
7,8,Germany,Germany,Europe
8,9,Australia,Australia,Pacific
9,10,United Kingdom,United Kingdom,Europe
