In [1]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

### Dataset

In [2]:
# Sample data (replace this with your dataset)
data = {
    "numeric_feature": [1, 2, None, 4, 5],
    "categorical_feature": ["A", "B", "A", "B", "A"],
    "label": [0, 1, 1, 0, 1],
}
df = pd.DataFrame(data)

### Separate Labels

In [3]:
# Separate features and labels
X = df.drop("label", axis=1)
y = df["label"]

### Test Train Split

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

### Preprocessing Steps
#### Define the Steps

In [5]:
# Define preprocessing steps
numeric_features = ["numeric_feature"]
numeric_transformer = Pipeline(
    steps=[("imputer", SimpleImputer(strategy="mean")), ("scaler", StandardScaler())]
)

categorical_features = ["categorical_feature"]
categorical_transformer = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", OneHotEncoder()),
    ]
)

#### Gather All Steps

In [6]:
# Create a preprocessor using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)

### Create ML Pipeline
1. Preprocessing Steps
2. ML Model to Apply

In [7]:
# Create a pipeline with preprocessing and a classifier (replace 'classifier' with your
# model)
pipeline = Pipeline(
    steps=[("preprocessor", preprocessor), ("classifier", RandomForestClassifier())]
)

### Fit the Model

In [8]:
# Fit the model
pipeline.fit(X_train, y_train)

### Evaluate the Model

In [9]:
# Evaluate the model
accuracy = pipeline.score(X_test, y_test)
print(f"Model Accuracy: {accuracy}")

Model Accuracy: 0.0
