## ESTUDIO DE COMO PRETENDE LLEGAR LA DATA

---
### DEPENDENCIAS
---

In [None]:
import sys
import subprocess
import os

def instalar_si_no(package, pip_name=None):
    try:
        __import__(package)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name or package])

paquetes = {
    "pandas": "pandas",
    "pyodbc": "pyodbc",
    "pymssql": "pymssql",
    "sqlalchemy": "sqlalchemy",
    "dotenv": "python-dotenv"
}

for modulo, pip_name in paquetes.items():
    instalar_si_no(modulo, pip_name)
    
import pandas as pd
import pyodbc
from dotenv import load_dotenv
from sqlalchemy import create_engine
import urllib

---
### Cargue de variables de entorno
---

In [56]:
load_dotenv(override=True)
server   = os.getenv("SPACEPARTS_SERVER")
database = os.getenv("SPACEPARTS_DATABASE")
username = os.getenv("SPACEPARTS_USERNAME")
password = os.getenv("SPACEPARTS_PASSWORD")

---
### Definición de funciones
---

In [None]:
def get_connection(database="master"):
    conn_str = (
        "DRIVER={ODBC Driver 18 for SQL Server};"
        f"SERVER={server};DATABASE={database};"
        f"UID={username};PWD={password};"
        "Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;"
    )
    return pyodbc.connect(conn_str)

def list_databases():
    """Mapeamos las bases a las cuales la llamada nos va a pertimir acceder"""
    with get_connection("master") as conn:
        cursor = conn.cursor()
        cursor.execute("SELECT name FROM sys.databases;")
        return [row[0] for row in cursor.fetchall()]

def list_tables(database):
    """Lista todas las tablas en una base de datos"""
    with get_connection(database) as conn:
        cursor = conn.cursor()
        cursor.execute("""
            SELECT TABLE_SCHEMA, TABLE_NAME 
            FROM INFORMATION_SCHEMA.TABLES 
            WHERE TABLE_TYPE='BASE TABLE'
            ORDER BY TABLE_SCHEMA, TABLE_NAME;
        """)
        return [f"{row[0]}.{row[1]}" for row in cursor.fetchall()]

def sample_table(database, table_name, n=5):
    """Obtiene una muestra de n filas de una tabla"""
    # Nota: Separa esquema y tabla
    if "." in table_name:
        schema, table = table_name.split(".", 1)
    else:
        schema, table = "dbo", table_name
    
    full_name = f"[{schema}].[{table}]"
    query = f"SELECT TOP {n} * FROM {full_name};"
    odbc_str = (
        f"DRIVER={{ODBC Driver 18 for SQL Server}};"
        f"SERVER={server};DATABASE={database};"
        f"UID={username};PWD={password};"
        "Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;"
    )
    conn_str = f"mssql+pyodbc:///?odbc_connect={urllib.parse.quote_plus(odbc_str)}"
    engine = create_engine(conn_str)
    
    try:
        df = pd.read_sql(query, engine)
        return df
    except Exception as e:
        print(f"No se pudo leer {full_name}: {e}")
        return None

---
### Resultados de estudio origen de data
---

In [72]:
dbs = list_databases()
print("Database disponibles:", dbs)

Database disponibles: ['master', 'SpacePartsCoDW']


In [74]:
tables = list_tables("master")
tables

['dbo.sysdac_history_internal', 'dbo.sysdac_instances_internal']

In [75]:
tables = list_tables("SpacePartsCoDW")
tables

['dim.Brands',
 'dim.Budget-Rate',
 'dim.Customers',
 'dim.Employees',
 'dim.Exchange-Rate',
 'dim.Invoice-DocType',
 'dim.Order-DocType',
 'dim.Order-Status',
 'dim.Products',
 'dim.Regions',
 'fact.Budget',
 'fact.Forecast',
 'fact.Invoices',
 'fact.Orders']

---
### Segun la pagina que documenta la información:
---

- Invoices: facturación + penalizaciones + devoluciones.
- Orders: pedidos de clientes + fechas de entrega.
- Budget: metas anuales por cliente/producto/mes.
- Forecast: revisiones mensuales del budget.

In [76]:
df_sample = sample_table("SpacePartsCoDW", "dim.Invoice-DocType", n=5)
df_sample

Unnamed: 0,Billing Document Type Code,Text,Doc. Type Ordinal,Group,Group Ordinal,DWCreatedDate
0,F2,Normal Invoice,1,Invoice,1,2023-02-10 14:52:08.297
1,L2,Express Order,2,Invoice,1,2023-02-10 14:52:08.297
2,YW,Warranty,3,Adjustment,2,2023-02-10 14:52:08.297
3,YR,Return,4,Adjustment,2,2023-02-10 14:52:08.297
4,Z2,Other,5,Other,3,2023-02-10 14:52:08.297


---
### Exploración de la data
---