In [None]:

# ── 4) Fix mixed‐type categoricals
cat_cols = X_par.select_dtypes(include=["object","category"]).columns
X_par[cat_cols] = X_par[cat_cols].astype(str)
X_lon[cat_cols] = X_lon[cat_cols].astype(str)

# ── 5) London train/test split
X_L_train, X_L_test, y_L_train, y_L_test = train_test_split(
    X_lon, y_lon, test_size=0.20, random_state=42
)

# ── 6) Build two separate preprocessors
num_cols = X_par.select_dtypes(include=["int64","float64"]).columns

preprocess_parent = ColumnTransformer([
    ("num", Pipeline([
        ("imp",   SimpleImputer(strategy="median")),
        ("scale", StandardScaler())
    ]), num_cols),
    ("cat", Pipeline([
        ("imp",   SimpleImputer(strategy="constant", fill_value="missing")),
        ("oh",    OneHotEncoder(handle_unknown="ignore"))
    ]), cat_cols),
], remainder="drop")

preprocess_london = ColumnTransformer([
    ("num", Pipeline([
        ("imp",   SimpleImputer(strategy="median")),
        ("scale", StandardScaler())
    ]), num_cols),
    ("cat", Pipeline([
        ("imp",   SimpleImputer(strategy="constant", fill_value="missing")),
        ("oh",    OneHotEncoder(handle_unknown="ignore"))
    ]), cat_cols),
], remainder="drop")

# ── 7) Teacher: Ridge on parent markets
teacher = Pipeline([
    ("prep", preprocess_parent),
    ("reg",  Ridge(alpha=10.0))
])
print("Training teacher on parent markets…")
teacher.fit(X_par, y_par)

# ── 8) Soft-labels: teacher predictions on London train
z_T = teacher.predict(X_L_train)

# ── 9) Pseudo-labels for joint loss
alpha = 0.5
y_pseudo = alpha * y_L_train.values + (1 - alpha) * z_T

# ── 10) Student: Ridge on London with pseudo-labels
student = Pipeline([
    ("prep", preprocess_london),
    ("reg",  Ridge(alpha=1.0))
])
print("Training student on London (joint‐loss pseudo-labels)…")
student.fit(X_L_train, y_pseudo)

# ── 11) Evaluate on London test
def report(name, model):
    preds = model.predict(X_L_test)
    rmse  = np.sqrt(mean_squared_error(y_L_test, preds))
    r2    = r2_score(y_L_test, preds)
    print(f"{name:<10} | RMSE: {rmse:,.0f}  R²: {r2:.3f}")

print("\n=== Distillation Results on London Test ===")
report("Teacher", teacher)
report("Student", student)