# Mover información cruda a aumentada

## Importamos librerías

In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import *
import uuid

## Creamos variables claves del proyecto

In [0]:
# Variables de ubicación de archivos
uc_location_aumented = 'medallion_architecture.aumented.'
uc_location_agregated = 'medallion_architecture.aggregated.'

# Otras variables
date_format = 'dd/MM/yyyy'

## Creamos schemas para las diferentes tablas a leer

### Schema para las tablas transaccionales

In [0]:
df_transaction_schema = StructType(
  fields=[
    StructField('SHOP_WEEK', StringType(),True),
    StructField('SHOP_DATE', StringType(), True),
    StructField('SHOP_WEEKDAY',  StringType(), True),
    StructField('SHOP_HOUR', StringType(), True),
    StructField('QUANTITY', DoubleType(), True),
    StructField('SPEND', DoubleType(), True),
    StructField('PROD_CODE', StringType(), True),
    StructField('PROD_CODE_10', StringType(), True),
    StructField('PROD_CODE_20', StringType(), True),
    StructField('PROD_CODE_30', StringType(), True),
    StructField('PROD_CODE_40', StringType(), True),
    StructField('CUST_CODE', StringType(), True),
    StructField('seg_1', StringType(), True),
    StructField('seg_2', StringType(), True),
    StructField('BASKET_ID', StringType(), True),
    StructField('BASKET_SIZE', StringType(), True),
    StructField('BASKET_PRICE_SENSITIVITY', StringType(), True),
    StructField('BASKET_TYPE', StringType(), True),
    StructField('BASKET_DOMINANT_MISSION', StringType(), True),
    StructField('STORE_CODE', StringType(), True),
    StructField('STORE_FORMAT', StringType(), True),
    StructField('STORE_REGION', StringType(), True)
])

### Schema para las tablas de fecha

In [0]:
df_time_schema = StructType(
  fields=[
    StructField('shop_week', StringType(),True),
    StructField('date_from', StringType(), True),
    StructField('date_to',  StringType(), True)
])

## Unity Catalog

In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS medallion_architecture.aggregated
COMMENT 'En este schema va a ir guardada la informacion agregada'
;

## Ejecución del código

In [0]:
%sql
CREATE TABLE IF NOT EXISTS medallion_architecture.aggregated.customers AS
SELECT 
  CUST_CODE, seg_1, seg_2 
FROM
  medallion_architecture.aumented.transactions
GROUP BY 
  CUST_CODE, seg_1, seg_2
;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS medallion_architecture.aggregated.products AS
SELECT 
  PROD_CODE, PROD_CODE_10, PROD_CODE_20, PROD_CODE_30, PROD_CODE_40 
FROM
  medallion_architecture.aumented.transactions
GROUP BY 
  PROD_CODE, PROD_CODE_10, PROD_CODE_20, PROD_CODE_30, PROD_CODE_40
;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS medallion_architecture.aggregated.baskets AS
SELECT 
  BASKET_ID, BASKET_SIZE, BASKET_PRICE_SENSITIVITY, BASKET_TYPE, BASKET_DOMINANT_MISSION, SHOP_WEEK, SHOP_DATE, SHOP_HOUR
FROM
  medallion_architecture.aumented.transactions
GROUP BY 
  BASKET_ID, BASKET_SIZE, BASKET_PRICE_SENSITIVITY, BASKET_TYPE, BASKET_DOMINANT_MISSION, SHOP_WEEK, SHOP_DATE, SHOP_HOUR
;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS medallion_architecture.aggregated.stores AS
SELECT 
  STORE_CODE, STORE_FORMAT, STORE_REGION
FROM
  medallion_architecture.aumented.transactions
GROUP BY 
  STORE_CODE, STORE_FORMAT, STORE_REGION
;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS medallion_architecture.aggregated.fact_table AS
SELECT 
  PROD_CODE, CUST_CODE, BASKET_ID, STORE_CODE, QUANTITY, SPEND
FROM
  medallion_architecture.aumented.transactions
;

In [0]:
%sql
--DROP TABLE IF EXISTS medallion_architecture.aggregated.baskets;