In [None]:
import sys
import os

# Add the actual path where 'data_sources' exists
dagster_path = os.path.abspath(os.path.join(os.getcwd(), ".."))  # Goes from 'dags/' to 'dagster/'
if dagster_path not in sys.path:
    sys.path.insert(0, dagster_path)

print(f"✅ 'dagster' path added to sys.path: {dagster_path}")

In [None]:
# ✅ Cell 2: Import Dagster and the asset
from dagster import build_op_context
from data_sources.external_db_ingest import external_data
from data_sources.csv_mapping_ingest import load_csv_data
from data_preprocessing.transaction_clean_merge import run_full_preprocessing

# Define DB config
db_config = {
    "database": "admin_aim_masterfile",
    "host": "162.241.4.11",
    "password": "7x9r7f@2W",  # Replace with actual password
    "port": 3306,
    "queries": [
        """
        SELECT data_id, branch, pos, transdate, ite_code, quantity, dep_code, date, time, type, delivery
        FROM admin_aim_masterfile.rd5000
        WHERE date > '2022-01-01' AND branch in ('OLA', 'BRLN');
        """,
        """
        SELECT data_id, incode, ite_desc, dep_code, unit_prc, branch, pos
        FROM admin_aim_masterfile.rd5500
        WHERE branch in ('OLA', 'BRLN');
        """,
        """
        SELECT data_id, dept_code, dept_name, branch, pos
        FROM admin_aim_masterfile.rd1800
        WHERE branch in ('OLA', 'BRLN');
        """
    ],
    "user": "admin_aim_sandbox"
}

csv_config = {
    "department_filepath": "../../../csv/department_category_mapping.csv",
    "item_filepath": "../../../csv/item_category_mapping.csv"
}

In [None]:
# Simulate Dagster execution contexts
db_context = build_op_context(config=db_config)
csv_context = build_op_context(config=csv_config)

# Run data loaders
external_outputs = external_data(db_context)
csv_outputs = load_csv_data(csv_context)

In [None]:
# Run full data cleaning and enrichment pipeline
final_df = run_full_preprocessing(external_outputs, csv_outputs)

print("✅ Final dataset shape:", final_df.shape)
final_df.head(10)  # Display top rows