In [2]:
from pathlib import Path

import pandas as pd
import numpy as np
import cloudpickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix


In [None]:
bundle_path = "mushroom_lr_artifacts/mushroom_predict_bundle.pkl"
with open(bundle_path, "rb") as f:
    pp = cloudpickle.load(f)

input_data = {
    "cap-shape": "f",
    "cap-surface": "f",
    "cap-color": "n",
    "bruises": "f"
}

prob = pp.predict_from_partial(input_data)
print(f"{prob:.4f}")

# test partial input 

In [None]:
import json

# === 1) Load dataset ===
df = pd.read_csv("data/mushrooms.csv")

# === 1.1) Load kept feature list from JSON ===
with open("mushroom_lr_artifacts/kept_features.json", "r") as f: 
    kept_features = json.load(f)

# === 1.2) Keep only those features ===
df = df[kept_features + ["class"]]

# Target encoding
y = df["class"].map({"e": 0, "p": 1})
X = df.drop(columns=["class"])

# === 2) Split dataset (same seed as training!) ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

# === 3) Load the saved model bundle ===
with open("mushroom_lr_artifacts/mushroom_predict_bundle.pkl", "rb") as f:
    pp = cloudpickle.load(f)

# === 4) Monte Carlo partial-input testing ===
rng = np.random.default_rng(42)  # same seed for reproducibility
missing_ratio = 0.5              # hide 50% of features
n_samples = 300                  # test 300 samples

preds, probs, trues = [], [], []
test_indices = rng.choice(len(X_test), size=n_samples, replace=False)

for idx in test_indices:
    row = X_test.iloc[idx].to_dict()
    y_true = y_test.iloc[idx]
    
    # Randomly mask features
    partial_input = {
        k: (v if rng.random() > missing_ratio else "")
        for k, v in row.items()
    }

    # Use the Monte Carlo predictor (already encapsulated)
    prob = pp.predict_from_partial(partial_input)
    pred = 1 if prob >= 0.5 else 0

    probs.append(prob)
    preds.append(pred)
    trues.append(y_true)

# === 5) Evaluate ===
acc = accuracy_score(trues, preds)
auc = roc_auc_score(trues, probs)

print("\n=== Partial Input Test Results ===")
print(f"Missing ratio: {missing_ratio:.0%}")
print(f"Samples tested: {n_samples}")
print(f"Accuracy: {acc:.4f}")
print(f"ROC-AUC:  {auc:.4f}")


=== Partial Input Test Results ===
Missing ratio: 50%
Samples tested: 300
Accuracy: 0.9833
ROC-AUC:  0.9984
