In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression

In [2]:
X, y = make_regression(n_features=4, n_informative=4)

In [3]:
feature_names = ["float_feature", "discrete_feature", "toggle_feature", "categorical_feature"]
data = pd.DataFrame(X, columns=feature_names)
data["target"] = y

In [4]:
data["discrete_feature"] = pd.cut(data["discrete_feature"], 4).cat.codes

In [5]:
data["toggle_feature"] = pd.cut(data["toggle_feature"], 2).cat.codes
data["toggle_feature"] = data["toggle_feature"].astype(str).replace({"1": "Yes", "0": "No"})

In [6]:
n_cat = 4
data["categorical_feature"] = pd.cut(data["categorical_feature"], n_cat).cat.codes
cat_map = {str(i): f"Category {i}" for i in range(n_cat)}
data["categorical_feature"] = data["categorical_feature"].astype(str).replace(cat_map)

In [7]:
data.to_csv("sample_data.csv", index=False)

In [8]:
from sklearn.compose import make_column_transformer
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder

In [9]:
cat_features = ["toggle_feature", "categorical_feature"]
model = make_pipeline(
    make_column_transformer((OneHotEncoder(), cat_features), remainder="passthrough"), 
    GradientBoostingRegressor()
)

In [10]:
model.fit(data[feature_names], y).predict(data[feature_names])

array([   2.73728556,   36.0495944 ,   50.92656736,   77.02064548,
        -63.21055864,   79.72731056,    3.66269959,   41.22668421,
        -63.68173402,  -15.27327672,  -64.4331673 ,  -63.90381919,
        -15.15931355,  -66.72321425,  -59.45678202,    4.13832654,
        -74.06209811,   84.71038098,   46.31771328,   -3.43477098,
        -20.31033646,  -49.20306842,   10.53892973,    7.1886117 ,
        -65.08113177,  -24.56996591,  -72.11652747, -114.6209501 ,
        -37.77857964,   -2.72790157,  -54.83087553,   59.22510545,
        -39.04089394,  -76.35536024,   41.53536006,  -59.28802088,
        -31.21587188,  -55.85260059,   10.24631662,   55.93973626,
        -79.86232649,   60.18004734,  -81.89593646,  -38.91580174,
         37.13965914,  -43.19698786,  -35.79236718,  -40.18329807,
         86.757876  ,   41.66936052,   46.31771328,   31.76366777,
          6.61227481, -130.64122922,  102.3604229 ,   15.9482643 ,
         45.86102665,    2.00336267,   98.5649919 ,   10.98104