In [0]:
# AUTO-GENERATED FROM MAGE
# Pipeline: user_address_bq

In [0]:
dbutils.widgets.text("params","")

In [0]:
params_str = dbutils.widgets.get("params")

In [0]:
import json
params = json.loads(params_str)

In [0]:
import sys
import os

notebook_dir = os.path.dirname(os.path.abspath('__file__'))
mage_ai_path = os.path.join(
    notebook_dir,
    '/Volumes/dealshare_prod/default/mage-ai/mage-ai/mage-ai-master/'
)

print(f"Adding to sys.path: {mage_ai_path}")
if mage_ai_path not in sys.path:
    sys.path.insert(0, mage_ai_path)

import mage_ai
print(f"Successfully imported mage_ai from: {mage_ai.__file__}")

In [0]:
PIPELINE_NAME = "user_address_bq"
print(f"-----------Starting pipeline: {PIPELINE_NAME}-----------")

%md
**Mage block:** bq_std_generic_last_modified
**Type:** data_loader

In [0]:
from mage_ai.settings.repo import get_repo_path
from mage_ai.io.bigquery import BigQuery
from mage_ai.io.config import ConfigFileLoader
from mage_ai.io.mysql import MySQL
from os import path
from datetime import datetime,timedelta
import pandas as pd
from datetime import datetime,timedelta
if 'data_loader' not in globals():
    from mage_ai.data_preparation.decorators import data_loader

@data_loader
def load_data_from_big_query(*args, **kwargs):
    query = f"SELECT max(modified_date) as dt FROM {kwargs['dataset']}.{kwargs['table_name']}"
    config_path = '/Volumes/dealshare_prod/default/io_config/io_config.yaml'
    config_profile = 'default'
    dt = BigQuery.with_config(ConfigFileLoader(config_path, config_profile)).load(query)['dt'][0]-timedelta(seconds=30)
    dt = dt.strftime("%Y-%m-%d %H:%M:%S")
    return dt

In [0]:
last_modified_date = load_data_from_big_query(**params)

**Mage block:** user_address_extract
**Type:** data_loader

In [0]:
if 'data_loader' not in globals():
    from mage_ai.data_preparation.decorators import data_loader
import pandas as pd

@data_loader
def load_data_from_mysql(last_modified_date, *args, **kwargs):
    
    query = f"""SELECT * FROM user_address WHERE modified_date>='{last_modified_date}'"""

    config_path = '/Volumes/dealshare_prod/default/io_config/io_config.yaml'
    config_profile = 'happy_offer'

    with MySQL.with_config(ConfigFileLoader(config_path, config_profile)) as loader:
        data = loader.load(query)
        return data

In [0]:
source_df = load_data_from_mysql(last_modified_date)

**Mage block:** user_address_reformat
**Type:** data_transform

In [0]:
from mage_ai.data_cleaner.transformer_actions.base import BaseAction
from mage_ai.data_cleaner.transformer_actions.constants import ActionType, Axis
from mage_ai.data_cleaner.transformer_actions.utils import build_transformer_action
from pandas import DataFrame
import pandas as pd

if 'transformer' not in globals():
    from mage_ai.data_preparation.decorators import transformer
if 'test' not in globals():
    from mage_ai.data_preparation.decorators import test


@transformer
def execute_transformer_action(df: DataFrame, *args, **kwargs) -> DataFrame:
    """
    Execute Transformer Action: ActionType.REFORMAT

    Docs: https://docs.mage.ai/guides/transformer-blocks#reformat-values
    """
    # df['state'] = df['state'].fillna(0).astype(int)
    df['state'] = pd.to_numeric(df['state'], errors='coerce').fillna(0).astype(int)
    df['city'] = pd.to_numeric(df['city'], errors='coerce').fillna(0).astype(int)
    # df['city'] = df['city'].fillna(0).astype(int)
    return df


@test
def test_output(output, *args) -> None:
    """
    Template code for testing the output of the block.
    """
    assert output is not None, 'The output is undefined'


In [0]:
transformed_df = execute_transformer_action(source_df)

**Mage block:** user_address_bq_export
**Type:** data_exporter


In [0]:
import time

if 'data_exporter' not in globals():
    from mage_ai.data_preparation.decorators import data_exporter

@data_exporter
def export_data_to_big_query(df, **kwargs):
    staging_table = "staging_raw.dim_user_address_staging"
    target_table = "oms_analytics.dim_user_address"
    config_path = '/Volumes/dealshare_prod/default/io_config/io_config.yaml'
    config_profile = 'default'
    
    loader = ConfigFileLoader(config_path, config_profile)

    # Wrap ALL columns in backticks to prevent reserved word errors
    df_columns = df.columns.tolist()
    
    update_columns = ",\n".join([f"t.{c} = s.{c}" for c in df_columns])
    insert_columns = ",\n".join(df_columns)
    insert_values = ",\n".join([f"s.{c}" for c in df_columns])

    merge_query = f"""
    MERGE `oms_analytics.dim_user_address` t
    USING `staging_raw.dim_user_address_staging` s
    ON t.id = s.id
    WHEN MATCHED THEN UPDATE SET {update_columns}
    WHEN NOT MATCHED THEN
    INSERT ({insert_columns})
    VALUES ({insert_values})
    """
   
    BigQuery.with_config(ConfigFileLoader(config_path, config_profile)).export(
        df,
        staging_table,
        if_exists='replace'
    )
    BigQuery.with_config(ConfigFileLoader(config_path, config_profile)).execute(merge_query)

In [0]:
export_data_to_big_query(transformed_df)

In [0]:
print(f"-----------Pipeline {PIPELINE_NAME} completed successfully-----------")