In [1]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.datasets import fetch_openml

from dense_feature_mixer import DenseFeatureMixer
from dense_feature_mixer.compose import ColumnTransformerWithNames


X, y = fetch_openml("titanic", version=1, return_X_y=True, as_frame=True)
X = X.drop(["name", "ticket", "cabin", "boat", "body"], axis=1).dropna()
y = y.reindex(X.index).astype(int)
numeric_vars = ["age", "fare"]
categorical_vars = ["sex", "pclass", "parch", "embarked"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


In [2]:
ordinal_encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=999)
col_transformer = ColumnTransformerWithNames([("num_transformer", StandardScaler(), numeric_vars), 
                                              ("cat_transformer", ordinal_encoder, categorical_vars)],
                                             remainder="drop")

pipe = make_pipeline(col_transformer,
                     DenseFeatureMixer(task="classification",
                                       categorical_vars=categorical_vars,
                                       unknown_category=999),
                     LogisticRegression())
#pipe.fit(X_train, y_train)

cv_score = cross_val_score(pipe, X_train, y_train, cv=3)
print(f"Mean score: {cv_score.mean():.2f} ({cv_score.std():.2f})")

Mean score: 0.80 (0.03)
