In [226]:
import boto3
import pandas as pd
import numpy as np
import configparser
import psycopg2
import mysql.connector
from datetime import datetime, timedelta

### Leemos archivo de configuración y credenciales de base de datos MySQL

In [227]:
config = configparser.ConfigParser()
config.read('config.cfg')

['config.cfg']

In [228]:
aws_rds_conn = boto3.client('rds', aws_access_key_id=config.get('IAM', 'ACCESS_KEY'),
                            aws_secret_access_key=config.get('IAM', 'SECRET_KEY'),
                            region_name='us-east-1')

### Verificamos instancias disponibles para el usuario

In [229]:
rds_instances_ids = []

aws_response = aws_rds_conn.describe_db_instances()

for response in aws_response['DBInstances']:
    rds_instances_ids.append(response['DBInstanceIdentifier'])
    
print(f"Instances disponibles: {rds_instances_ids}")

Instances disponibles: ['dw-galileo-final', 'transacc-galileo-final']


### Creamos instancia de base de datos en AWS - MySQL

In [230]:
try:
    response = aws_rds_conn.create_db_instance(
        DBInstanceIdentifier=config.get('DATAWAREHOUSE', 'DB_INSTANCE_ID'),
        DBName=config.get('DATAWAREHOUSE', 'DB_NAME'),
        MasterUsername=config.get('DATAWAREHOUSE', 'DB_USERNAME'),
        MasterUserPassword=config.get('DATAWAREHOUSE', 'DB_PASSWORD'),
        Port=int(config.get('DATAWAREHOUSE', 'DB_PORT')),
        DBInstanceClass='db.t3.micro',
        Engine='mysql',
        PubliclyAccessible=True,
        AllocatedStorage=10,
        VpcSecurityGroupIds=[config.get('VPC', 'SECURITY_GROUP')]
    )
    
    print(response)
except aws_rds_conn.exceptions.DBInstanceAlreadyExistsFault:
    print("La instancia ya existe")
except Exception as ex:
    print("Error!!!", ex)

La instancia ya existe


In [231]:
try:
    instance = aws_rds_conn.describe_db_instances(DBInstanceIdentifier=config.get('DATAWAREHOUSE', 'DB_INSTANCE_ID'))
    RDS_HOSTNAME = instance.get('DBInstances')[0].get('Endpoint').get('Address')
    print(RDS_HOSTNAME)
except Exception as ex:
    print("Error!!!", ex)

dw-galileo-final.czy2eeg8ibjx.us-east-1.rds.amazonaws.com


### Nos conectamos a la base de datos y creamos las tablas

In [232]:
import sql_datawarehouse

In [233]:
try:
    db_mysql_conn = mysql.connector.connect(
        database=config.get('DATAWAREHOUSE', 'DB_NAME'),
        user=config.get('DATAWAREHOUSE', 'DB_USERNAME'),
        password=config.get('DATAWAREHOUSE', 'DB_PASSWORD'),
        port=config.get('DATAWAREHOUSE', 'DB_PORT'),
        host=RDS_HOSTNAME
    )

    cursor = db_mysql_conn.cursor()
    cursor.execute(sql_datawarehouse.DDL_QUERY, multi=True)
    #db_mysql_conn.commit()
    print("Data Warehouse Creado Exitosamente")
except Exception as ex:
    print("Error!!!", ex)

Data Warehouse Creado Exitosamente


### Nos conectamos a la base de datos de Transaccional de Postgres

In [234]:
try:
    instance = aws_rds_conn.describe_db_instances(DBInstanceIdentifier=config.get('TRANSACCIONAL', 'DB_INSTANCE_ID'))
    RDS_HOSTNAME_TRANSAC = instance.get('DBInstances')[0].get('Endpoint').get('Address')
    print(RDS_HOSTNAME_TRANSAC)
except Exception as ex:
    print("Error!!!", ex)

transacc-galileo-final.czy2eeg8ibjx.us-east-1.rds.amazonaws.com


### Establecemos los drivers de MySQL y Postgres

In [235]:
mysql_driver = f"""mysql+pymysql://{config.get('DATAWAREHOUSE', 'DB_USERNAME')}:{config.get('DATAWAREHOUSE', 'DB_PASSWORD')}@{RDS_HOSTNAME}:{config.get('DATAWAREHOUSE', 'DB_PORT')}/{config.get('DATAWAREHOUSE', 'DB_NAME')}"""  
mysql_driver

'mysql+pymysql://admin_mysql:yopL5uxeqAd8rino4am3sAV1y@dw-galileo-final.czy2eeg8ibjx.us-east-1.rds.amazonaws.com:3306/dw_galileo'

In [236]:
postgres_driver = f"""postgresql://{config.get('TRANSACCIONAL', 'DB_USERNAME')}:{config.get('TRANSACCIONAL', 'DB_PASSWORD')}@{RDS_HOSTNAME_TRANSAC}:{config.get('TRANSACCIONAL', 'DB_PORT')}/{config.get('TRANSACCIONAL', 'DB_NAME')}"""
postgres_driver

'postgresql://admin_postgres:faQAdrENuBrIbredr8VUtRUcA@transacc-galileo-final.czy2eeg8ibjx.us-east-1.rds.amazonaws.com:5432/transaccional_galileo'

### Dimensión Articulo

In [237]:
sql_query = 'SELECT * FROM articulo;'
df_articulos = pd.read_sql(sql_query, postgres_driver)
df_articulos.head()

Unnamed: 0,id_articulo,id_categoria,codigo,nombre,precio_venta,stock,descripcion,imagen,estado
0,1,2,8937159222354,toward,2162.75,10,Coach suffer.,analysis.png,0
1,2,4,574505660958,agent,9563.25,17,Position.,bring.png,1
2,3,4,4351938887352,kitchen,8648.05,13,Two test employee.,production.png,1
3,4,2,6938514661773,number,9228.22,20,Meeting line the.,hope.png,1
4,5,4,2401212744627,line,9343.93,13,Lead that four land.,field.png,1


In [238]:
sql_query = 'SELECT * FROM categoria;'
df_categorias = pd.read_sql(sql_query, postgres_driver)
df_categorias.head()

Unnamed: 0,id_categoria,nombre,descripcion,estado
0,1,Ropa,Ropa,0
1,2,Zapatos,Zapatos,1
2,3,Celulares,Celulares,0
3,4,Hogar,Hogar,1
4,5,Ferreteria,Ferreteria,0


In [239]:
dimArticulos = df_articulos.merge(df_categorias, on='id_categoria', how='inner', suffixes=('', '_categoria'))
dimArticulos.head()

Unnamed: 0,id_articulo,id_categoria,codigo,nombre,precio_venta,stock,descripcion,imagen,estado,nombre_categoria,descripcion_categoria,estado_categoria
0,1,2,8937159222354,toward,2162.75,10,Coach suffer.,analysis.png,0,Zapatos,Zapatos,1
1,4,2,6938514661773,number,9228.22,20,Meeting line the.,hope.png,1,Zapatos,Zapatos,1
2,8,2,6911973856256,school,951.7,19,Deal page medical.,glass.png,1,Zapatos,Zapatos,1
3,2,4,574505660958,agent,9563.25,17,Position.,bring.png,1,Hogar,Hogar,1
4,3,4,4351938887352,kitchen,8648.05,13,Two test employee.,production.png,1,Hogar,Hogar,1


In [240]:
dimArticulos.drop(['id_categoria', 'precio_venta', 'stock', 'descripcion', 'imagen', 'estado', 'descripcion_categoria', 'estado_categoria'], axis=1,  inplace=True)
dimArticulos.head()

Unnamed: 0,id_articulo,codigo,nombre,nombre_categoria
0,1,8937159222354,toward,Zapatos
1,4,6938514661773,number,Zapatos
2,8,6911973856256,school,Zapatos
3,2,574505660958,agent,Hogar
4,3,4351938887352,kitchen,Hogar


In [241]:
dimArticulos.rename(columns={'nombre_categoria': 'categoria'}, inplace=True)
dimArticulos.head()

Unnamed: 0,id_articulo,codigo,nombre,categoria
0,1,8937159222354,toward,Zapatos
1,4,6938514661773,number,Zapatos
2,8,6911973856256,school,Zapatos
3,2,574505660958,agent,Hogar
4,3,4351938887352,kitchen,Hogar


### Dimensión Cliente

In [242]:
sql_query = 'SELECT * FROM persona;'
df_clientes = pd.read_sql(sql_query, postgres_driver)
df_clientes.head()

Unnamed: 0,id_persona,tipo_persona,nombre,tipo_documento,num_documento,direccion,telefono,email
0,1,Juridica,Erin Collins,DPI,8728803,86057 Brock Forge,51427273,teresamiller@hotmail.com
1,2,Individual,Sarah Pineda,Pasaporte,6304635,"PSC 0794, Box 9476",12299199,jason54@yahoo.com
2,3,Individual,Andrew Patterson,DPI,369531,179 Michelle Canyon,80005035,anthony56@hotmail.com
3,4,Juridica,Carlos Baker,Pasaporte,5869907,8035 Lin Well Suite 375,42046389,vstokes@hotmail.com
4,5,Juridica,Angela Terry,DPI,5725840,03058 Alvarez Roads,71142687,jennygreen@hotmail.com


In [243]:
dimClientes = df_clientes
dimClientes.drop(['tipo_documento', 'num_documento', 'direccion', 'telefono', 'email'], axis=1, inplace=True)
dimClientes.head()

Unnamed: 0,id_persona,tipo_persona,nombre
0,1,Juridica,Erin Collins
1,2,Individual,Sarah Pineda
2,3,Individual,Andrew Patterson
3,4,Juridica,Carlos Baker
4,5,Juridica,Angela Terry


In [244]:
dimClientes.rename(columns={'id_persona': 'id_cliente'}, inplace=True)
dimClientes.head()

Unnamed: 0,id_cliente,tipo_persona,nombre
0,1,Juridica,Erin Collins
1,2,Individual,Sarah Pineda
2,3,Individual,Andrew Patterson
3,4,Juridica,Carlos Baker
4,5,Juridica,Angela Terry


### Dimensión Usuario

In [245]:
sql_query = 'SELECT * FROM usuario;'
df_usuarios = pd.read_sql(sql_query, postgres_driver)
df_usuarios.head()

Unnamed: 0,id_usuario,id_rol,nombre,tipo_documento,num_documento,direccion,telefono,email,clave,estado
0,1,1,Kathy Pratt,DPI,2666204,689 Jones Crossing Suite 479,23592929,dawnrich@hotmail.com,KKsjLCBWmDyGEcfIayLR,0
1,2,1,Jennifer Williams,Pasaporte,8380166,8406 Lawrence Ranch,96179608,reyescarla@yahoo.com,UVGoQxRSVpBVZBlNDXSh,1
2,3,1,Melissa Cox,DPI,9058972,643 Becker Prairie,28566093,fkidd@hotmail.com,HoWbvIoWyDvElMkEHlwH,1
3,4,1,Tara Ramos,Pasaporte,3751727,598 Gray Crescent,85544694,jwells@yahoo.com,jUmrkSlDhMZxzTXWZawq,1
4,5,3,Charles Rogers,DPI,4345986,75543 Hendrix Fort Apt. 373,66111788,holmesanthony@yahoo.com,TunuyqrENPrmqfBqXecN,1


In [246]:
sql_query = 'SELECT * FROM rol;'
df_roles = pd.read_sql(sql_query, postgres_driver)
df_roles.head()

Unnamed: 0,id_rol,nombre,descripcion,estado
0,1,Cliente,Cliente Normal,1
1,2,Gerente,Administrador,1
2,3,Supervisor,Supervisor,1


In [247]:
dimUsuarios = df_usuarios.merge(df_roles, on='id_rol', how='inner', suffixes=('', '_rol'))
dimUsuarios.head()

Unnamed: 0,id_usuario,id_rol,nombre,tipo_documento,num_documento,direccion,telefono,email,clave,estado,nombre_rol,descripcion,estado_rol
0,1,1,Kathy Pratt,DPI,2666204,689 Jones Crossing Suite 479,23592929,dawnrich@hotmail.com,KKsjLCBWmDyGEcfIayLR,0,Cliente,Cliente Normal,1
1,2,1,Jennifer Williams,Pasaporte,8380166,8406 Lawrence Ranch,96179608,reyescarla@yahoo.com,UVGoQxRSVpBVZBlNDXSh,1,Cliente,Cliente Normal,1
2,3,1,Melissa Cox,DPI,9058972,643 Becker Prairie,28566093,fkidd@hotmail.com,HoWbvIoWyDvElMkEHlwH,1,Cliente,Cliente Normal,1
3,4,1,Tara Ramos,Pasaporte,3751727,598 Gray Crescent,85544694,jwells@yahoo.com,jUmrkSlDhMZxzTXWZawq,1,Cliente,Cliente Normal,1
4,6,1,Erin Thompson,Pasaporte,1697027,112 Amanda Spring,88580940,robertsjonathan@gmail.com,pThOeexyaNTddAPjUYNM,1,Cliente,Cliente Normal,1


In [248]:
dimUsuarios.drop(['id_rol', 'tipo_documento', 'num_documento', 'direccion', 'telefono', 'email', 'clave', 'descripcion', 'estado_rol'], axis=1, inplace=True)
dimUsuarios.head()

Unnamed: 0,id_usuario,nombre,estado,nombre_rol
0,1,Kathy Pratt,0,Cliente
1,2,Jennifer Williams,1,Cliente
2,3,Melissa Cox,1,Cliente
3,4,Tara Ramos,1,Cliente
4,6,Erin Thompson,1,Cliente


In [249]:
dimUsuarios.rename(columns={'nombre_rol': 'rol'}, inplace=True)
dimUsuarios.head()

Unnamed: 0,id_usuario,nombre,estado,rol
0,1,Kathy Pratt,0,Cliente
1,2,Jennifer Williams,1,Cliente
2,3,Melissa Cox,1,Cliente
3,4,Tara Ramos,1,Cliente
4,6,Erin Thompson,1,Cliente


### Dimensión de tiempo

In [250]:
# Crear una lista de fechas desde '2020-01-01' hasta la fecha actual
fecha_inicio = datetime(2020, 1, 1)
fecha_actual = datetime.now()
lista_fechas = pd.date_range(start=fecha_inicio, end=fecha_actual, freq='D')
    
dimFechas = pd.DataFrame({'full_date': lista_fechas})
    
dimFechas.head()

Unnamed: 0,full_date
0,2020-01-01
1,2020-01-02
2,2020-01-03
3,2020-01-04
4,2020-01-05


In [251]:
dimFechas['id_date'] = pd.DatetimeIndex(dimFechas['full_date']).strftime('%Y%m%d')
dimFechas['year'] = pd.DatetimeIndex(dimFechas['full_date']).isocalendar().year.tolist()
dimFechas['month'] = pd.DatetimeIndex(dimFechas['full_date']).month
dimFechas['quarter'] = pd.DatetimeIndex(dimFechas['full_date']).quarter
dimFechas['day'] = pd.DatetimeIndex(dimFechas['full_date']).isocalendar().day.tolist()
dimFechas['week'] = pd.DatetimeIndex(dimFechas['full_date']).isocalendar().week.tolist()
dimFechas['day_name'] = dimFechas['full_date'].dt.day_name().tolist()
dimFechas['day_of_week'] = pd.DatetimeIndex(dimFechas['full_date']).dayofweek
dimFechas['weekday_flag'] = dimFechas['day_of_week'].apply(lambda x: 'Weekend' if x > 5 else 'Weekday')
dimFechas['month_name'] = dimFechas['full_date'].dt.month_name().tolist()
dimFechas.head()

Unnamed: 0,full_date,id_date,year,month,quarter,day,week,day_name,day_of_week,weekday_flag,month_name
0,2020-01-01,20200101,2020,1,1,3,1,Wednesday,2,Weekday,January
1,2020-01-02,20200102,2020,1,1,4,1,Thursday,3,Weekday,January
2,2020-01-03,20200103,2020,1,1,5,1,Friday,4,Weekday,January
3,2020-01-04,20200104,2020,1,1,6,1,Saturday,5,Weekday,January
4,2020-01-05,20200105,2020,1,1,7,1,Sunday,6,Weekend,January


### Creamos la tabla de hechos

In [252]:
sql_query = 'SELECT * FROM venta;'
df_ventas = pd.read_sql(sql_query, postgres_driver)
df_ventas.head()

Unnamed: 0,id_venta,id_cliente,id_usuario,tipo_comprobante,serie_comprobante,num_comprobante,fecha,impuesto,total,estado
0,1,55,64,Recibo,p7-8029,9656,2021-01-01,14.5,1450.0,0
1,2,50,32,Factura,m1-7676,64223,2023-10-13,9.82,982.0,1
2,3,10,35,Factura,t9-5484,75899,2022-12-24,81.0,8100.0,1
3,4,6,65,Recibo,w6-6659,22202,2022-01-16,89.8,8980.0,1
4,5,48,38,Factura,b1-6479,59344,2022-06-25,5.43,543.0,1


In [253]:
sql_query = 'SELECT * FROM detalle_venta;'
df_detalle_ventas = pd.read_sql(sql_query, postgres_driver)
df_detalle_ventas.head()

Unnamed: 0,id_detalle_venta,id_venta,id_articulo,cantidad,precio,descuento
0,1,1,17,3,3193.16,31.93
1,2,2,17,1,6355.5,0.0
2,3,3,13,3,9366.12,0.0
3,4,4,7,2,8799.85,88.0
4,5,5,15,1,951.7,0.0


In [254]:
ventas_fact = df_ventas.merge(df_detalle_ventas, on='id_venta', how='inner', suffixes=('', '_det'))
ventas_fact.head()

Unnamed: 0,id_venta,id_cliente,id_usuario,tipo_comprobante,serie_comprobante,num_comprobante,fecha,impuesto,total,estado,id_detalle_venta,id_articulo,cantidad,precio,descuento
0,1,55,64,Recibo,p7-8029,9656,2021-01-01,14.5,1450.0,0,1,17,3,3193.16,31.93
1,2,50,32,Factura,m1-7676,64223,2023-10-13,9.82,982.0,1,2,17,1,6355.5,0.0
2,3,10,35,Factura,t9-5484,75899,2022-12-24,81.0,8100.0,1,3,13,3,9366.12,0.0
3,4,6,65,Recibo,w6-6659,22202,2022-01-16,89.8,8980.0,1,4,7,2,8799.85,88.0
4,5,48,38,Factura,b1-6479,59344,2022-06-25,5.43,543.0,1,5,15,1,951.7,0.0


In [255]:
ventas_fact['total'] = ventas_fact['cantidad'] * ventas_fact['precio']
ventas_fact.drop(['tipo_comprobante', 'serie_comprobante', 'num_comprobante', 'impuesto', 'id_detalle_venta', 'cantidad', 'precio'], axis=1, inplace=True)
ventas_fact.head()

Unnamed: 0,id_venta,id_cliente,id_usuario,fecha,total,estado,id_articulo,descuento
0,1,55,64,2021-01-01,9579.48,0,17,31.93
1,2,50,32,2023-10-13,6355.5,1,17,0.0
2,3,10,35,2022-12-24,28098.36,1,13,0.0
3,4,6,65,2022-01-16,17599.7,1,7,88.0
4,5,48,38,2022-06-25,951.7,1,15,0.0


### Insertamos la data de cada Dimensión en el Data Warehouse

#### Dimensión de Articulos

In [256]:
dimArticulos.to_sql('dim_articulo', mysql_driver, index=False, if_exists='append')

30

### Dimensión de Clientes

In [257]:
dimClientes.to_sql('dim_cliente', mysql_driver, index=False, if_exists='append')

55

### Dimensión de Fechas

In [258]:
dimFechas.to_sql('dim_fecha', mysql_driver, index=False, if_exists='append')

1562

### Dimensión de Usuarios

In [259]:
dimUsuarios.to_sql('dim_usuario', mysql_driver, index=False, if_exists='append')

68

### Construimos la tabla de hechos

In [260]:
sql_query = 'SELECT * FROM dim_articulo;'
df_dim_articulos = pd.read_sql(sql_query, mysql_driver)
df_dim_articulos.head()

Unnamed: 0,sk_articulo,id_articulo,codigo,nombre,categoria
0,1,1,8937159222354,toward,Zapatos
1,2,4,6938514661773,number,Zapatos
2,3,8,6911973856256,school,Zapatos
3,4,2,574505660958,agent,Hogar
4,5,3,4351938887352,kitchen,Hogar


In [261]:
df_venta_fact_dim_articulos = ventas_fact.merge(df_dim_articulos, on='id_articulo', how='inner', suffixes=('', '_articulo'))
df_venta_fact_dim_articulos.head()

Unnamed: 0,id_venta,id_cliente,id_usuario,fecha,total,estado,id_articulo,descuento,sk_articulo,codigo,nombre,categoria
0,1,55,64,2021-01-01,9579.48,0,17,31.93,21,2154239197357,wonder,Ropa
1,2,50,32,2023-10-13,6355.5,1,17,0.0,21,2154239197357,wonder,Ropa
2,29,37,41,2023-02-22,2455.29,1,17,0.0,21,2154239197357,wonder,Ropa
3,48,38,63,2021-01-10,9168.32,1,17,0.0,21,2154239197357,wonder,Ropa
4,57,24,47,2021-05-27,2455.29,1,17,0.0,21,2154239197357,wonder,Ropa


In [262]:
df_venta_fact_dim_articulos.drop(columns=['id_articulo', 'codigo', 'nombre', 'categoria'], axis=1, inplace=True)
df_venta_fact_dim_articulos.head()

Unnamed: 0,id_venta,id_cliente,id_usuario,fecha,total,estado,descuento,sk_articulo
0,1,55,64,2021-01-01,9579.48,0,31.93,21
1,2,50,32,2023-10-13,6355.5,1,0.0,21
2,29,37,41,2023-02-22,2455.29,1,0.0,21
3,48,38,63,2021-01-10,9168.32,1,0.0,21
4,57,24,47,2021-05-27,2455.29,1,0.0,21


In [263]:
sql_query = 'SELECT * FROM dim_cliente;'
df_dim_clientes = pd.read_sql(sql_query, mysql_driver)
df_dim_clientes.head()

Unnamed: 0,sk_cliente,id_cliente,nombre,tipo_persona
0,1,1,Erin Collins,Juridica
1,2,2,Sarah Pineda,Individual
2,3,3,Andrew Patterson,Individual
3,4,4,Carlos Baker,Juridica
4,5,5,Angela Terry,Juridica


In [264]:
df_venta_fact_dim_clientes = df_venta_fact_dim_articulos.merge(df_dim_clientes, on='id_cliente', how='inner', suffixes=('', '_cliente'))
df_venta_fact_dim_clientes.head()

Unnamed: 0,id_venta,id_cliente,id_usuario,fecha,total,estado,descuento,sk_articulo,sk_cliente,nombre,tipo_persona
0,1,55,64,2021-01-01,9579.48,0,31.93,21,55,Matthew Santiago MD,Individual
1,134,55,55,2023-08-31,951.7,1,0.0,21,55,Matthew Santiago MD,Individual
2,98,55,3,2022-12-08,13716.94,1,0.0,27,55,Matthew Santiago MD,Individual
3,73,55,33,2022-09-09,27504.96,1,91.68,29,55,Matthew Santiago MD,Individual
4,117,55,61,2023-09-19,13716.94,1,0.0,29,55,Matthew Santiago MD,Individual


In [265]:
df_venta_fact_dim_clientes.drop(columns=['id_cliente', 'nombre', 'tipo_persona'], axis=1, inplace=True)
df_venta_fact_dim_clientes.head()

Unnamed: 0,id_venta,id_usuario,fecha,total,estado,descuento,sk_articulo,sk_cliente
0,1,64,2021-01-01,9579.48,0,31.93,21,55
1,134,55,2023-08-31,951.7,1,0.0,21,55
2,98,3,2022-12-08,13716.94,1,0.0,27,55
3,73,33,2022-09-09,27504.96,1,91.68,29,55
4,117,61,2023-09-19,13716.94,1,0.0,29,55


In [266]:
sql_query = 'SELECT * FROM dim_usuario;'
df_dim_usuarios = pd.read_sql(sql_query, mysql_driver)
df_dim_usuarios.head()

Unnamed: 0,sk_usuario,id_usuario,nombre,estado,rol
0,1,1,Kathy Pratt,0,Cliente
1,2,2,Jennifer Williams,1,Cliente
2,3,3,Melissa Cox,1,Cliente
3,4,4,Tara Ramos,1,Cliente
4,5,6,Erin Thompson,1,Cliente


In [267]:
df_venta_fact_dim_usuarios = df_venta_fact_dim_clientes.merge(df_dim_usuarios, on='id_usuario', how='inner', suffixes=('', '_usuario'))
df_venta_fact_dim_usuarios.head()

Unnamed: 0,id_venta,id_usuario,fecha,total,estado,descuento,sk_articulo,sk_cliente,sk_usuario,nombre,estado_usuario,rol
0,1,64,2021-01-01,9579.48,0,31.93,21,55,38,Michael Price,1,Cliente
1,258,64,2022-03-27,18732.24,1,0.0,1,20,38,Michael Price,1,Cliente
2,156,64,2021-03-11,19066.5,1,0.0,19,19,38,Michael Price,1,Cliente
3,295,64,2022-06-27,8725.19,1,87.25,15,39,38,Michael Price,1,Cliente
4,157,64,2023-03-09,19126.5,1,95.63,22,39,38,Michael Price,1,Cliente


In [268]:
df_venta_fact_dim_usuarios.drop(columns=['id_usuario', 'nombre', 'estado_usuario', 'rol'], axis=1, inplace=True)
df_venta_fact_dim_usuarios.head()

Unnamed: 0,id_venta,fecha,total,estado,descuento,sk_articulo,sk_cliente,sk_usuario
0,1,2021-01-01,9579.48,0,31.93,21,55,38
1,258,2022-03-27,18732.24,1,0.0,1,20,38
2,156,2021-03-11,19066.5,1,0.0,19,19,38
3,295,2022-06-27,8725.19,1,87.25,15,39,38
4,157,2023-03-09,19126.5,1,95.63,22,39,38


In [269]:
sql_query = 'SELECT * FROM dim_fecha;'
df_dim_fechas = pd.read_sql(sql_query, mysql_driver)
df_dim_fechas.head()

Unnamed: 0,id_date,full_date,year,month,quarter,day,week,day_of_week,day_name,weekday_flag,month_name
0,20200101,2020-01-01,2020,1,1,3,1,2,Wednesday,Weekday,January
1,20200102,2020-01-02,2020,1,1,4,1,3,Thursday,Weekday,January
2,20200103,2020-01-03,2020,1,1,5,1,4,Friday,Weekday,January
3,20200104,2020-01-04,2020,1,1,6,1,5,Saturday,Weekday,January
4,20200105,2020-01-05,2020,1,1,7,1,6,Sunday,Weekend,January


In [270]:
#len(df_venta_fact_dim_usuarios)
df_venta_fact_dim_fechas = pd.merge(df_venta_fact_dim_usuarios, df_dim_fechas, left_on='fecha', right_on='full_date', how='inner')
df_venta_fact_dim_fechas.head()

Unnamed: 0,id_venta,fecha,total,estado,descuento,sk_articulo,sk_cliente,sk_usuario,id_date,full_date,year,month,quarter,day,week,day_of_week,day_name,weekday_flag,month_name
0,1,2021-01-01,9579.48,0,31.93,21,55,38,20210101,2021-01-01,2020,1,1,5,53,4,Friday,Weekday,January
1,258,2022-03-27,18732.24,1,0.0,1,20,38,20220327,2022-03-27,2022,3,1,7,12,6,Sunday,Weekend,March
2,156,2021-03-11,19066.5,1,0.0,19,19,38,20210311,2021-03-11,2021,3,1,4,10,3,Thursday,Weekday,March
3,158,2021-03-11,19126.5,1,0.0,29,35,68,20210311,2021-03-11,2021,3,1,4,10,3,Thursday,Weekday,March
4,295,2022-06-27,8725.19,1,87.25,15,39,38,20220627,2022-06-27,2022,6,2,1,26,0,Monday,Weekday,June


In [271]:
df_venta_fact_dim_fechas.drop(columns=['fecha', 'full_date', 'year', 'month', 'quarter', 'day', 'week', 'day_of_week', 'day_name', 'weekday_flag', 'month_name'], axis=1, inplace=True)
df_venta_fact_dim_fechas.head()

Unnamed: 0,id_venta,total,estado,descuento,sk_articulo,sk_cliente,sk_usuario,id_date
0,1,9579.48,0,31.93,21,55,38,20210101
1,258,18732.24,1,0.0,1,20,38,20220327
2,156,19066.5,1,0.0,19,19,38,20210311
3,158,19126.5,1,0.0,29,35,68,20210311
4,295,8725.19,1,87.25,15,39,38,20220627


In [272]:
df_venta_fact_dim_fechas.rename(columns={'id_date': 'id_fecha_venta'}, inplace=True)
ventaFact = df_venta_fact_dim_fechas
ventaFact.head()

Unnamed: 0,id_venta,total,estado,descuento,sk_articulo,sk_cliente,sk_usuario,id_fecha_venta
0,1,9579.48,0,31.93,21,55,38,20210101
1,258,18732.24,1,0.0,1,20,38,20220327
2,156,19066.5,1,0.0,19,19,38,20210311
3,158,19126.5,1,0.0,29,35,68,20210311
4,295,8725.19,1,87.25,15,39,38,20220627


### Insertamos los datos en la tabla de hechos

In [273]:
ventaFact.to_sql('venta_fact', mysql_driver, index=False, if_exists='append')

382