## ETL DIM_CUSTOMER

In [47]:
import pandas as pd
import  numpy as np
from config.db_config import conn, source

### Extracción

In [48]:
# DataFrames para customer, addres
df_customers = pd.read_sql_query('select * from customer', source)
df_addres = pd.read_sql_query('select * from address', source)
df_city = pd.read_sql_query('select * from city', source)
df_country = pd.read_sql_query('select * from country', source)

## Transformación

In [49]:
merged_df = pd.merge(df_customers, df_addres, on='address_id', how='inner', suffixes=('_customers', '_address'))
merged_df = pd.merge(merged_df, df_city, on='city_id', how='inner', suffixes=('_merged', '_city'))
merged_df = pd.merge(merged_df, df_country, on='country_id', how='inner', suffixes=('_merged', '_country'))

In [50]:
merged_df['full_name'] = merged_df['first_name'] + ' ' + merged_df['last_name']
merged_df = merged_df[['full_name', 'address', 'city', 'country']]
merged_df.columns = ['full_name', 'address', 'city', 'country']

In [51]:
merged_df['id'] = np.arange(1, len(merged_df) + 1)
df_dim_customer = merged_df[['id', 'full_name', 'address', 'city', 'country']]

In [52]:
df_dim_customer['id'] = df_dim_customer['id'].astype(int)
df_dim_customer['full_name'] = (df_dim_customer['full_name'].astype(str)).str.title()
df_dim_customer['address'] = (df_dim_customer['address'].astype(str)).str.title()
df_dim_customer['city'] = (df_dim_customer['city'].astype(str)).str.title()
df_dim_customer['country'] = (df_dim_customer['country'].astype(str)).str.title()

In [53]:
df_dim_customer.rename(columns={'id' : 'id_customer', 'full_name' : 'name_customer', 'address' : 'address_customer', 'city' : 'city_customer', 'country': 'country_customer'}, inplace=True)

## Carga

In [54]:
cursor = conn.cursor()
try:
    table_name = 'dim_customer'
    insert_query = """
        INSERT INTO dim_customer (id_customer, name_customer, address_customer, city_customer, country_customer)
        VALUES (%s, %s, %s, %s, %s)
        ON CONFLICT (id_customer) DO UPDATE
        SET name_customer = EXCLUDED.name_customer, address_customer = EXCLUDED.address_customer, city_customer = EXCLUDED.city_customer, country_customer = EXCLUDED.country_customer;
    """
    filas_insertadas = 0
    for index, row in df_dim_customer.iterrows():
        values = tuple(row)
        cursor.execute(insert_query, values)
        filas_insertadas += 1
    conn.commit()
    print(f"Se afectaron {filas_insertadas} filas exitosamente.")
except Exception as e:
    conn.rollback()
    print("Error durante la insercion:", e)
finally:
    cursor.close()  
    

Se afectaron 599 filas exitosamente.
