In [8]:
# Step A: Load Dataset
import pandas as pd

df = pd.read_csv(
    "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv"
)

df = df[["age", "fare", "sex", "embarked", "survived"]]


In [9]:
# Step B: Feature Groups
num_features = ["age", "fare"]
cat_features = ["sex", "embarked"]


In [10]:
# Step C: Transformation Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import PowerTransformer, StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

numeric_pipeline = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("power", PowerTransformer(method="yeo-johnson")),
        ("scaler", StandardScaler())
    ]
)

categorical_pipeline = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("encoder", OneHotEncoder(handle_unknown="ignore"))
    ]
)

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_pipeline, num_features),
        ("cat", categorical_pipeline, cat_features)
    ]
)


In [11]:
# Step D: Model Pipeline
from sklearn.linear_model import LogisticRegression

pipeline = Pipeline(
    steps=[
        ("preprocessing", preprocessor),
        ("model", LogisticRegression(max_iter=1000))
    ]
)


In [12]:
# Step E: Train & Evaluate
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = df[num_features + cat_features]
y = df["survived"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

pipeline.fit(X_train, y_train)
preds = pipeline.predict(X_test)

accuracy_score(y_test, preds)


0.7821229050279329

In [13]:
from sklearn.inspection import permutation_importance
result = permutation_importance(
    pipeline,
    X_test,
    y_test,
    n_repeats=10,
    random_state=42,
    scoring="accuracy"
)


In [14]:
result

{'importances_mean': array([0.00391061, 0.00111732, 0.22513966, 0.00055866]),
 'importances_std': array([0.0025601 , 0.00741145, 0.03516004, 0.00391061]),
 'importances': array([[ 0.        ,  0.00558659,  0.        ,  0.00558659,  0.        ,
          0.00558659,  0.00558659,  0.00558659,  0.00558659,  0.00558659],
        [ 0.00558659,  0.        ,  0.01117318,  0.        ,  0.00558659,
         -0.01675978,  0.00558659,  0.00558659, -0.00558659,  0.        ],
        [ 0.25139665,  0.27932961,  0.24022346,  0.18994413,  0.24581006,
          0.19553073,  0.22346369,  0.16201117,  0.26256983,  0.20111732],
        [ 0.        ,  0.        ,  0.        , -0.00558659, -0.00558659,
          0.00558659,  0.00558659,  0.        ,  0.00558659,  0.        ]])}