In [None]:

def make_preprocess_pipeline(feature_df: pd.DataFrame,
                             cat_cols=("city","branch"),
                             force_drop=("MEMBERNBR","snapshot_month")):
    df = feature_df.copy()

    # numeric candidates: everything number-ish except ids
    num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    num_cols = [c for c in num_cols if c not in force_drop]

    cat_cols = [c for c in cat_cols if c in df.columns]

    # log-transform for heavy-tailed money-ish cols
    log_cols = [c for c in num_cols if any(k in c for k in ["deposit","loan","income","pos_","bill_","payroll","otherdep","net_worth"])]
    passthrough_cols = [c for c in num_cols if c not in log_cols]

    log_pipe = Pipeline([
        ("log1p", FunctionTransformer(lambda x: np.log1p(np.clip(x, 0, None)))),
        ("scaler", RobustScaler())
    ])

    num_pipe = Pipeline([
        ("scaler", RobustScaler())
    ])

    pre = ColumnTransformer(
        transformers=[
            ("log_num", log_pipe, log_cols),
            ("num", num_pipe, passthrough_cols),
            ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
        ],
        remainder="drop"
    )
    return pre