In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
data = {
    'age': [25, 30, None, 35, 40], 
    'income': [50000, 60000, 55000, 80000, None],  
    'city': ['New York', 'Los Angeles', 'Chicago', 'New York', 'Los Angeles'] 
}
df = pd.DataFrame(data)
y = [0, 1, 0, 1, 0]


numerical_features = ['age', 'income']
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')), 
    ('scaler', StandardScaler()) 
])

categorical_features = ['city']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),  
    ('onehot', OneHotEncoder(handle_unknown='ignore'))  
])


preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])


X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=42)

model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),  
    ('classifier', LogisticRegression())  
])


model_pipeline.fit(X_train, y_train)

y_pred = model_pipeline.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print(accuracy)


0.0
