In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# 1. Load and preprocess Adult data
data = fetch_openml("adult", version=2, as_frame=True)
df = data.frame.dropna()
X = pd.get_dummies(df.drop("class", axis=1))
y = (df["class"] == ">50K").astype(int)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y.values, test_size=0.2, random_state=42, stratify=y
)

# 2. Train Model A (Logistic Regression) and B (MLP)
model_A = LogisticRegression(max_iter=500, random_state=0)
model_B = MLPClassifier(hidden_layer_sizes=(64,32), max_iter=40, random_state=42)

model_A.fit(X_train, y_train)
model_B.fit(X_train, y_train)

A_pred = model_A.predict(X_test)
B_pred = model_B.predict(X_test)

# 3. Mark winner (A wins=0, B wins=1; break ties in favor of A)
def winner(a, b, y):
    if a == y and b != y:
        return 0
    elif b == y and a != y:
        return 1
    else:
        return 0  # Prefer A in ties

winner_labels = np.array([winner(a, b, y0) for a, b, y0 in zip(A_pred, B_pred, y_test)])

# 4. kNN Router (on test set; in production use, would train on deployment/validation set)
knn_router = KNeighborsClassifier(n_neighbors=5)
knn_router.fit(X_test, winner_labels)  # Use X_test for simplicity (demonstration)

# 5. Meta-classifier Router
meta_clf = RandomForestClassifier(n_estimators=100, random_state=42)
meta_clf.fit(X_test, winner_labels)

# 6. Routing predictions
def deployed_pred(strategy, X):
    if strategy == 'A':
        return A_pred
    elif strategy == 'B':
        return B_pred
    elif strategy == 'knn':
        which_model = knn_router.predict(X)
        return np.where(which_model == 0, A_pred, B_pred)
    elif strategy == 'meta':
        which_model = meta_clf.predict(X)
        return np.where(which_model == 0, A_pred, B_pred)

strategies = ['A', 'B', 'knn', 'meta']
results = {}
for strat in strategies:
    preds = deployed_pred(strat, X_test)
    acc = accuracy_score(y_test, preds)
    # Regressions: A was correct, now wrong
    regressions = ((A_pred == y_test) & (preds != y_test)).sum()
    results[strat] = {'accuracy': acc, 'regressions': regressions}
    print(f"{strat:5s}  Accuracy: {acc:.4f}  Regressions vs. A: {regressions}")

# Optional: Show detailed reports
for strat in strategies:
    preds = deployed_pred(strat, X_test)
    print(f"\n{strat.upper()} Classification report:")
    print(classification_report(y_test, preds))




A      Accuracy: 0.8459  Regressions vs. A: 0
B      Accuracy: 0.8396  Regressions vs. A: 368
knn    Accuracy: 0.8501  Regressions vs. A: 15
meta   Accuracy: 0.8802  Regressions vs. A: 0

A Classification report:
              precision    recall  f1-score   support

           0       0.88      0.93      0.90      6803
           1       0.73      0.60      0.66      2242

    accuracy                           0.85      9045
   macro avg       0.80      0.76      0.78      9045
weighted avg       0.84      0.85      0.84      9045


B Classification report:
              precision    recall  f1-score   support

           0       0.87      0.92      0.90      6803
           1       0.71      0.60      0.65      2242

    accuracy                           0.84      9045
   macro avg       0.79      0.76      0.77      9045
weighted avg       0.83      0.84      0.83      9045


KNN Classification report:
              precision    recall  f1-score   support

           0       0.88 