In [78]:
import pandas as pd
import numpy as np
import psycopg2
import configparser

In [79]:
config = configparser.ConfigParser()
config.read('config.cfg')

['config.cfg']

### Access point dimensional

In [80]:
RDS_HOST_DBDIM = 'dbdim.cp6geq8ycm59.us-east-2.rds.amazonaws.com'

### Access point transaccional

In [81]:
RDS_HOST_DBTIENDA = 'dbtienda.cp6geq8ycm59.us-east-2.rds.amazonaws.com'

### Drivers a utilizar para conectar a las bases de datos

In [82]:
mysql_driver = f"""mysql+mysqlconnector://{config.get('DBTIENDA','DB_USERNAME')}:{config.get('DBTIENDA','DB_PASSWORD')}@{RDS_HOST_DBTIENDA}:{config.get('DBTIENDA','DB_PORT')}/{config.get('DBTIENDA','DB_NAME')}"""
mysql_driver

'mysql+mysqlconnector://admin_tienda:9HMt06J6jBeX@dbtienda.cp6geq8ycm59.us-east-2.rds.amazonaws.com:3306/tienda'

In [83]:
postgres_driver = f"""postgresql://{config.get('DIM','DB_USERNAME')}:{config.get('DIM','DB_PASSWORD')}@{RDS_HOST_DBDIM}:{config.get('DIM','DB_PORT')}/{config.get('DIM','DB_NAME')}"""
postgres_driver

'postgresql://admin_tiendadim:KNn9yX1ZeF16@dbdim.cp6geq8ycm59.us-east-2.rds.amazonaws.com:5432/tiendadim'

In [84]:
#!pip install mysql-connector-python #Para instalar la conexion con la base de datos
#import mysql.connector

### Leer tablas

In [85]:
sql_query_persona = 'SELECT * FROM persona;'
df_persona = pd.read_sql(sql_query_persona, mysql_driver)

sql_query_usuario = 'SELECT * FROM usuario;'
df_usuario = pd.read_sql(sql_query_usuario, mysql_driver)

sql_query_rol = 'SELECT * FROM rol;'
df_rol = pd.read_sql(sql_query_rol, mysql_driver)

sql_query_articulo = 'SELECT * FROM articulo;'
df_articulo = pd.read_sql(sql_query_articulo, mysql_driver)

sql_query_categoria = 'SELECT * FROM categoria;'
df_categoria = pd.read_sql(sql_query_categoria, mysql_driver)

In [86]:
df_categoria

Unnamed: 0,idcategoria,nombre,descripcion,estado
0,1,ALIMENTOS,Toward why instead.,1
1,2,JUGUETES,Large hair late must organization ready great.,1
2,3,HOGAR,Piece enter various mind dog because.,1
3,4,CALZADO,Senior check receive budget take group.,1
4,5,ROPA,Every there skin ago reach.,1
5,6,DEPORTES,Stand billion prevent his citizen activity.,1


# Transformar tablas dimensionales e insersion de datos

### Dimension Persona

In [87]:
#Seleccionamos los campos que necesitamos para la dimension
dim_persona = df_persona.loc[:, ["idpersona","tipo_persona","nombre","direccion","telefono","email"]]
#Le cambiamos los datos como aparecen en la tabla dimensional
dim_persona = dim_persona.rename(columns={'nombre': 'nombre_p', 'direccion': 'direccion_p', 'telefono': 'telefono_p', 'email': 'email_p'})
#insertamos datos a dim_persona.
dim_persona
#dim_persona = dim_persona.to_sql('dim_persona', postgres_driver, index=False, if_exists='append')

Unnamed: 0,idpersona,tipo_persona,nombre_p,direccion_p,telefono_p,email_p
0,1,PERSONA,Ashley Long,"731 Hall Rest Apt. 345\nNew Stephanie, TX 45230",+1-612-213-0569x885,heatherbass@example.org
1,2,PERSONA,Joseph Floyd,"827 Harris Squares\nSouth Michael, OH 15445",711.625.8479,ydavis@example.net
2,3,EMPRESA,Matthew Brown,Unit 5986 Box 8060\nDPO AE 31584,293.723.3014,taraholmes@example.org
3,4,EMPRESA,Scott Mayer,"824 Medina Avenue Suite 336\nPort Terri, NH 27174",001-763-708-7537,jimenezalicia@example.net
4,5,EMPRESA,Cassandra Torres,32549 Mendoza Extension Apt. 991\nEast Timothy...,5579496223,melissapeters@example.com
5,6,EMPRESA,Michael Cox,"01919 Richard Common Suite 092\nSouth Anna, OR...",412.396.3173x3157,donnakelley@example.org
6,7,EMPRESA,Kathy Perez,"598 Michael Forges Apt. 030\nWest Christine, N...",712.735.8251,wbrown@example.org
7,8,PERSONA,Margaret Goodwin,"PSC 8756, Box 2910\nAPO AE 34076",001-856-756-5240x009,wilsonjesse@example.net
8,9,EMPRESA,Jennifer Daugherty,"9702 Ingram Curve\nLake Jeffery, CT 95522",(647)397-5018x3727,johnbrown@example.net
9,10,PERSONA,Samantha Ramirez,"823 Brown Fords\nCollinsberg, MH 62101",+1-363-212-6561x8889,mark54@example.com


### Dimension Usuario

In [105]:
dim_usuario['estado_u'].dtypes

dtype('int64')

In [106]:
#Seleccionamos los campso que necesitamos para la dimension
dim_usuario = df_usuario.loc[:, ["idusuario","nombre","direccion","telefono","email","idrol","estado"]]

#Le cambiamos el nombre a los campos que tienen el nombre diferente
dim_usuario = dim_usuario.rename(columns={'nombre': 'nombre_u', 'direccion' : 'direccion_u','telefono':'telefono_u','email':'email_u','estado':'estado_u'})

#Seleccionamos los campso que necesitamos para la dimension
dim_rol = df_rol.loc[:, ["idrol","nombre","descripcion"]]
#Le cambiamos el nombre a los campos que tienen el nombre diferente
dim_rol = dim_rol.rename(columns={'nombre': 'rol_nombre', 'descripcion' : 'rol_descrip'})

join_usuario_rol = pd.merge(dim_usuario, dim_rol, left_on='idrol', right_on='idrol', how='inner').drop_duplicates() # Eliminamos Duplicados
#Ordenamos los campos
dim_usuario = join_usuario_rol.loc[:, ["idusuario","nombre_u","direccion_u","telefono_u","email_u","rol_nombre","rol_descrip"]]
#dim_usuario['estado_u'] = dim_usuario['estado_u'].astype('bool')
dim_usuario

Unnamed: 0,idusuario,nombre_u,direccion_u,telefono_u,email_u,rol_nombre,rol_descrip
0,1,Timothy Molina,"PSC 9959, Box 7333\nAPO AE 52999",+1-604-638-6487x176,ysalazar@example.net,Cajero,Table far ability specific.
1,9,Benjamin Griffin,"599 Robertson Motorway\nMacdonaldland, IN 91565",975.892.0474x92332,burnsfrederick@example.com,Cajero,Table far ability specific.
2,2,Anthony Atkinson,"PSC 5819, Box 2538\nAPO AP 80981",704.789.2953x04008,laguilar@example.com,Jefe Tienda,Rule blue development image.
3,3,Audrey Cortez,"117 Lisa Pass\nEast Crystalview, SC 87080",(586)900-4611,angela08@example.net,Jefe Tienda,Rule blue development image.
4,4,Ann Harvey,"241 Smith Viaduct Apt. 499\nSouth Nicole, PW 5...",(670)535-2059,michaelbennett@example.org,Jefe Tienda,Rule blue development image.
5,7,Charles Fisher,Unit 3320 Box 0342\nDPO AE 15839,334-527-5600x2111,qvasquez@example.org,Jefe Tienda,Rule blue development image.
6,8,Whitney Koch,Unit 4499 Box 7473\nDPO AE 79014,829.644.0708x47831,schroederjeffrey@example.com,Jefe Tienda,Rule blue development image.
7,10,Victoria Olson,"60816 Beard Station\nWest Walterstad, NH 89264",487-690-8268x542,nnelson@example.net,Jefe Tienda,Rule blue development image.
8,5,Amber Hughes,"0197 Kim Pass Suite 327\nOwenland, AR 28920",+1-305-888-0471x0208,hunterkevin@example.org,Gerente,Court reach adult western Democrat throw.
9,6,Nicole Morales,"3351 Cox Run Apt. 182\nMeghanchester, CO 41156",975.687.7608x0703,hcortez@example.net,Gerente,Court reach adult western Democrat throw.


### Dimension Articulo

In [107]:
#Seleccionamos los campso que necesitamos para la dimension
dim_articulo = df_articulo.loc[:, ["idarticulo","idcategoria","codigo","nombre","precio_venta","stock","descripcion","estado"]]
#Le cambiamos el nombre a los campos que tienen el nombre diferente
dim_articulo = dim_articulo.rename(columns={'nombre': 'articulo_nombre', 'estado' : 'articulo_estado','descripcion':'articulo_descrip'})

#Seleccionamos los campso que necesitamos para la dimension
dim_categoria = df_categoria.loc[:, ["idcategoria","nombre","descripcion"]]
#Le cambiamos el nombre a los campos que tienen el nombre diferente
dim_categoria = dim_categoria.rename(columns={'nombre': 'categoria_nombre', 'descripcion' : 'categoria_descrip'})

join_articulo_categoria = pd.merge(dim_articulo, dim_categoria, left_on='idcategoria', right_on='idcategoria', how='inner').drop_duplicates() # Eliminamos Duplicados

#Ordenamos los campos
dim_articulo = join_articulo_categoria.loc[:, ["idarticulo","codigo","articulo_nombre","precio_venta","stock","articulo_descrip","categoria_nombre","categoria_descrip"]]
dim_articulo
#dim_articulo['articulo_estado'] = dim_articulo['articulo_estado'].astype('int')

Unnamed: 0,idarticulo,codigo,articulo_nombre,precio_venta,stock,articulo_descrip,categoria_nombre,categoria_descrip
0,1,27082f,reason,836.00,16,Any news no source care just point.\nBook blue...,JUGUETES,Large hair late must organization ready great.
1,2,d654c3,evidence,886.83,8,Money stay miss gas. Person style onto.\nNeed ...,JUGUETES,Large hair late must organization ready great.
2,3,687595,federal,979.15,84,Even me he PM. Lay ten right.\nEnergy environm...,JUGUETES,Large hair late must organization ready great.
3,6,df4742,close,106.05,89,Reality structure himself name guess resource ...,JUGUETES,Large hair late must organization ready great.
4,11,7e072a,instead,826.38,82,Kid I large trade back. Perhaps college case m...,JUGUETES,Large hair late must organization ready great.
...,...,...,...,...,...,...,...,...
95,43,bafbd2,service,148.99,85,Can life yourself leader start child see. Kitc...,CALZADO,Senior check receive budget take group.
96,44,99a1ea,condition,449.53,96,Federal soon offer true allow cut president. C...,CALZADO,Senior check receive budget take group.
97,73,a81dc5,check,984.58,20,Cell citizen whether shake law similar. Though...,CALZADO,Senior check receive budget take group.
98,75,88a594,little,768.43,13,Remember any begin nice serious top realize. B...,CALZADO,Senior check receive budget take group.


### FACT_ingreso

In [90]:
sql_query_1 = 'SELECT * FROM ingreso;'
df_ingreso_2 = pd.read_sql(sql_query_1, mysql_driver)
sql_query_2 = 'SELECT * FROM detalle_ingreso;'
df_detale_ingreso_2 = pd.read_sql(sql_query_2, mysql_driver)

In [91]:
#Seleccionamos los campso que necesitamos para la dimension
dim_ingreso = df_ingreso_2.loc[:, ["idingreso","idproveedor","idusuario","fecha","total","impuesto"]]
df_detale_ingreso=  df_detale_ingreso_2.loc[:, ["idingreso","idarticulo","precio","cantidad"]]
#Calculamos los campos compra_d e impuesto_i
df_detale_ingreso ['compra_d'] = df_detale_ingreso['precio'] * df_detale_ingreso['cantidad']
df_detale_ingreso['impuesto_i']= df_detale_ingreso['compra_d']* 0.12
df_detale_ingreso = df_detale_ingreso.loc[:, ["idingreso","idarticulo","compra_d","impuesto_i","cantidad"]]

In [92]:
# Unimos las tablas que requerimos para la dimension
resultado_join = pd.merge(dim_ingreso, df_detale_ingreso, left_on='idingreso', right_on='idingreso', how='inner')
#Le cambiamos el nombre a los campos que tienen el nombre diferente
FACT_ingreso = resultado_join.rename(columns={'cantidad': 'compra_u'})
FACT_ingreso = FACT_ingreso.loc[:, ["idingreso","idproveedor","idusuario","fecha","idarticulo","compra_u","compra_d","impuesto_i"]]
FACT_ingreso['fecha'] = pd.to_datetime(FACT_ingreso['fecha'])
FACT_ingreso

Unnamed: 0,idingreso,idproveedor,idusuario,fecha,idarticulo,compra_u,compra_d,impuesto_i
0,1,6,5,2022-06-29 23:05:01,37,1,652.86,78.3432
1,2,3,2,2023-11-14 19:33:20,76,18,15755.94,1890.7128
2,2,3,2,2023-11-14 19:33:20,44,5,3780.90,453.7080
3,2,3,2,2023-11-14 19:33:20,41,14,2289.14,274.6968
4,3,5,1,2024-02-23 03:22:44,100,14,1504.72,180.5664
...,...,...,...,...,...,...,...,...
261,99,4,7,2023-03-10 23:00:04,55,19,2666.46,319.9752
262,100,6,5,2023-11-29 12:46:04,15,16,2442.56,293.1072
263,100,6,5,2023-11-29 12:46:04,69,6,5172.60,620.7120
264,100,6,5,2023-11-29 12:46:04,57,19,2178.35,261.4020


### FACT_venta

In [93]:
sql_query_venta = 'SELECT * FROM venta;'
df_venta = pd.read_sql(sql_query_venta, mysql_driver)
sql_query_detalle_venta = 'SELECT * FROM detalle_venta;'
df_detalle_venta = pd.read_sql(sql_query_detalle_venta, mysql_driver)

In [94]:
# Ajustes en df_venta y df_detalle_venta
df_venta = df_venta[['idventa','idcliente','idusuario','fecha']]
df_detalle_venta['venta_d'] = df_detalle_venta['cantidad']*(df_detalle_venta['precio']-df_detalle_venta['descuento'])
df_detalle_venta['descuento_d'] = df_detalle_venta['cantidad']*df_detalle_venta['descuento']
df_detalle_venta['impuesto_v'] = df_detalle_venta['venta_d']*0.12
df_detalle_venta = df_detalle_venta[['idventa','idarticulo','cantidad','venta_d','descuento_d','impuesto_v']]
FACT_venta = df_venta.merge(df_detalle_venta, on='idventa', how='inner').rename({'cantidad':'venta_u'}, axis=1)
FACT_venta

Unnamed: 0,idventa,idcliente,idusuario,fecha,idarticulo,venta_u,venta_d,descuento_d,impuesto_v
0,1,5,7,2022-07-06 04:22:56,9,11,1951.62,176.11,234.1944
1,2,7,1,2023-11-17 16:57:01,3,3,2547.15,13.71,305.6580
2,2,7,1,2023-11-17 16:57:01,84,9,3026.70,31.32,363.2040
3,2,7,1,2023-11-17 16:57:01,82,2,800.78,26.72,96.0936
4,2,7,1,2023-11-17 16:57:01,16,20,4341.80,82.60,521.0160
...,...,...,...,...,...,...,...,...,...
243,98,6,9,2023-01-13 11:08:53,43,7,6795.53,35.28,815.4636
244,98,6,9,2023-01-13 11:08:53,45,16,4982.08,13.28,597.8496
245,99,9,7,2023-08-19 05:30:48,85,5,607.60,59.80,72.9120
246,99,9,7,2023-08-19 05:30:48,65,6,5427.72,83.34,651.3264


### dim_calendar

In [95]:
import datetime as dt
#Calcular fecha minima y crear un dataframe con una columna calendario
fecha_minima = min(FACT_ingreso['fecha'].min(),FACT_venta['fecha'].min())
fecha_maxima = max(FACT_ingreso['fecha'].min(),FACT_venta['fecha'].min(),dt.datetime(2026,12,31))
dim_calendar = pd.DataFrame({'fecha':pd.date_range(start=fecha_minima, end='2026-12-31')})
#Sacamos los campos de la fecha que necesitamos
dim_calendar['mes'] = pd.to_datetime(dim_calendar['fecha']).dt.month
dim_calendar['año'] = pd.to_datetime(dim_calendar['fecha']).dt.year
dim_calendar['dia_mes'] = pd.to_datetime(dim_calendar['fecha']).dt.day
dim_calendar['dia_semana'] = pd.to_datetime(dim_calendar['fecha']).dt.dayofweek
dim_calendar['semana_año'] = pd.to_datetime(dim_calendar['fecha']).dt.isocalendar().week
dim_calendar

Unnamed: 0,fecha,mes,año,dia_mes,dia_semana,semana_año
0,2022-04-13 11:27:15,4,2022,13,2,15
1,2022-04-14 11:27:15,4,2022,14,3,15
2,2022-04-15 11:27:15,4,2022,15,4,15
3,2022-04-16 11:27:15,4,2022,16,5,15
4,2022-04-17 11:27:15,4,2022,17,6,15
...,...,...,...,...,...,...
1718,2026-12-26 11:27:15,12,2026,26,5,52
1719,2026-12-27 11:27:15,12,2026,27,6,52
1720,2026-12-28 11:27:15,12,2026,28,0,53
1721,2026-12-29 11:27:15,12,2026,29,1,53


## Insercion de datos en base de datos

In [96]:
# insertamos datos a dim_persona
dim_persona.to_sql('dim_persona', postgres_driver, index=False, if_exists='append')

10

In [108]:
#insertamos datos a dim_usuario.
dim_usuario.to_sql('dim_usuario', postgres_driver, index=False, if_exists='append')

10

In [109]:
#insertamos datos a dim_persona.
dim_articulo.to_sql('dim_articulo', postgres_driver, index=False, if_exists='append')

100

In [110]:
#insertamos datos a dim_calendar.
dim_calendar.to_sql('dim_calendar', postgres_driver, index=False, if_exists='append')

723

In [111]:
#insertamos datos a fact_ingreso.
FACT_ingreso.to_sql('fact_ingreso', postgres_driver, index=False, if_exists='append')

266

In [112]:
#insertamos datos a fact_venta.
FACT_venta.to_sql('fact_venta', postgres_driver, index=False, if_exists='append')

248