In [1]:
import tomllib
import pandas as pd
import time
from my_pipeline.extract import extract_data
from my_pipeline.transform import transform_data
from my_pipeline.outliers import remove_outliers
from my_pipeline.normalize import normalize_data
from my_pipeline.encode import encode_categorical
from my_pipeline.load import save_data
from my_pipeline.progress import StepProgress
from my_pipeline.profiler import profile_step
from my_pipeline.logger import get_logger
from pathlib import Path

# ---------------------------------------------------
# 1. Load TOML config
# ---------------------------------------------------
CONFIG_PATH = "config/settings.toml"

with open(CONFIG_PATH, "rb") as f:
    config = tomllib.load(f)

config
# ---------------------------------------------------
# 2. Setup logger
# ---------------------------------------------------
logger = get_logger(name="pipeline_notebook", level=config["logging"]["level"])
logger.info("Notebook pipeline started")
# ---------------------------------------------------


19:24:46 | INFO | Notebook pipeline started


In [2]:
STEP_FUNCTIONS = {
    "extract": extract_data,
    "transform": transform_data,
    "outliers": remove_outliers,
    "normalize": normalize_data,
    "encode": encode_categorical,
    "load": save_data,
}

In [3]:
def run_pipeline_from_config(config):
    steps = config["steps"]
    total_steps = len(steps)
    
    progress = StepProgress(total=total_steps)
    df = None

    for step in steps:
        logger.info(f"➡ Running step: {step}")

        params = config.get(step, {})
        func = STEP_FUNCTIONS[step]

        # DISPLAY PROGRESS
        progress.update(step)

        # --- PROFILE STEP
        start = time.time()

        if step == "extract":
            df = func(params["input_path"])

        elif step == "transform":
            df = func(
                df,
                method=params["method"],
                fill_value=params.get("fill_value")
            )

        elif step == "outliers":
            df = func(
                df,
                method=params["method"],
                threshold=params["threshold"]
            )

        elif step == "normalize":
            df = func(
                df,
                method=params["method"]
            )

        elif step == "encode":
            df = func(
                df,
                method=params["method"],
                target_column=params.get("target_column")
            )

        elif step == "load":
            func(df, output_path=params["output_path"])
        
        # END PROFILE
        elapsed = time.time() - start
        logger.info(f"⏱ Step {step} finished in {elapsed:.3f}s")

    progress.finish()
    return df


In [4]:
df_final = run_pipeline_from_config(config)
df_final.head()

19:24:48 | INFO | ➡ Running step: extract


▶️  Step 1/5 [██████------------------------]  20.0% | extract | elapsed 0.0s

19:24:48 | INFO | ⏱ Step extract finished in 0.004s
19:24:48 | INFO | ➡ Running step: transform


▶️  Step 2/5 [████████████------------------]  40.0% | transform | elapsed 0.0s

19:24:48 | INFO | ⏱ Step transform finished in 0.002s
19:24:48 | INFO | ➡ Running step: normalize


Running transform step...
Filling missing numeric values with mean...
✔ Missing value handling done
▶️  Step 3/5 [██████████████████------------]  60.0% | normalize | elapsed 0.0s

19:24:48 | INFO | ⏱ Step normalize finished in 0.004s
19:24:48 | INFO | ➡ Running step: encode


Applying Min-Max Normalization...
▶️  Step 4/5 [████████████████████████------]  80.0% | encode | elapsed 0.0s

19:24:48 | INFO | ⏱ Step encode finished in 0.004s
19:24:48 | INFO | ➡ Running step: load


Target Encoding mainroad using target 'price'
Target Encoding guestroom using target 'price'
Target Encoding basement using target 'price'
Target Encoding hotwaterheating using target 'price'
Target Encoding airconditioning using target 'price'
Target Encoding prefarea using target 'price'
Target Encoding furnishingstatus using target 'price'
▶️  Step 5/5 [██████████████████████████████] 100.0% | load | elapsed 0.0s

19:24:48 | INFO | ⏱ Step load finished in 0.007s



Completed all 5 steps in 0.03s


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,1.0,0.396564,0.6,0.333333,0.666667,0.280673,0.23998,0.237892,0.257887,0.36911,0.666667,0.350558,0.324303
1,0.909091,0.502405,0.6,1.0,1.0,0.280673,0.23998,0.237892,0.257887,0.36911,1.0,0.228544,0.324303
2,0.909091,0.571134,0.4,0.333333,0.333333,0.280673,0.23998,0.300633,0.257887,0.211423,0.666667,0.350558,0.271719
3,0.906061,0.402062,0.6,0.09515,0.333333,0.280673,0.23998,0.300633,0.257887,0.36911,1.0,0.350558,0.324303
4,0.836364,0.396564,0.6,0.09515,0.333333,0.280673,0.350034,0.300633,0.257887,0.36911,0.666667,0.228544,0.324303
