In [2]:
from pathlib import Path

import json
import pandas as pd
import numpy as np
import cloudpickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix


In [3]:
bundle_path = "mushroom_lr_artifacts/mushroom_predict_bundle.pkl"
with open(bundle_path, "rb") as f:
    pp = cloudpickle.load(f)

input_data = {
    "odor": "a"
}

prob = pp.predict_from_partial(input_data)
print(f"{prob:.4f}")

0.2535


In [5]:
import cloudpickle

# === Load your model bundle ===
bundle_path = "mushroom_lr_artifacts/mushroom_predict_bundle.pkl"
with open(bundle_path, "rb") as f:
    pp = cloudpickle.load(f)

# === Define the feature values (including the new one) ===
feature_values = {
    "odor": ["a", "c", "f", "l", "m", "n", "p", "s", "y"],
    "gill-color": ["b", "e", "g", "h", "k", "n", "o", "p", "r", "u", "w", "y"],
    "gill-size": ["b", "n"],
    "spore-print-color": ["b", "h", "k", "n", "o", "r", "u", "w", "y"],
    "ring-type": ["e", "f", "l", "n", "p"] 
}

# === Loop through each feature and test every value ===
for feature, values in feature_values.items():
    print(f"\n=== Feature: {feature} ===")
    for val in values:
        input_data = {feature: val}
        prob = pp.predict_from_partial(input_data)
        print(f"{feature} = {val} → predicted probability = {prob:.4f}")



=== Feature: odor ===
odor = a → predicted probability = 0.2535
odor = c → predicted probability = 0.6192
odor = f → predicted probability = 0.9271
odor = l → predicted probability = 0.2767
odor = m → predicted probability = 0.4988
odor = n → predicted probability = 0.0635
odor = p → predicted probability = 0.6486
odor = s → predicted probability = 0.7916
odor = y → predicted probability = 0.7703

=== Feature: gill-color ===
gill-color = b → predicted probability = 0.8172
gill-color = e → predicted probability = 0.3267
gill-color = g → predicted probability = 0.5748
gill-color = h → predicted probability = 0.6056
gill-color = k → predicted probability = 0.3077
gill-color = n → predicted probability = 0.2322
gill-color = o → predicted probability = 0.3059
gill-color = p → predicted probability = 0.4110
gill-color = r → predicted probability = 0.5002
gill-color = u → predicted probability = 0.2428
gill-color = w → predicted probability = 0.2863
gill-color = y → predicted probability = 0

# test partial input 

In [4]:
# === 1) Load dataset ===
df = pd.read_csv("data/output/mushrooms_new_imputed.csv")

# === 1.1) Load kept feature list from JSON ===
with open("mushroom_lr_artifacts/kept_features.json", "r") as f: 
    kept_features = json.load(f)

# === 1.2) Keep only those features ===
df = df[kept_features + ["class"]]

# === 3) Load the saved model bundle ===
with open("mushroom_lr_artifacts/mushroom_predict_bundle.pkl", "rb") as f:
    pp = cloudpickle.load(f)


In [5]:
def Monte_Carlo_test(missing_ratio=0.5):    
    # Target encoding
    y = df["class"].map({"e": 0, "p": 1})
    X = df.drop(columns=["class"])

    # === 2) Split dataset (same seed as training!) ===
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=42, stratify=y
    )
    
    # === 4) Monte Carlo partial-input testing ===
    rng = np.random.default_rng(42)  # same seed for reproducibility
    n_samples = 300                  # test 300 samples

    preds, probs, trues = [], [], []
    test_indices = rng.choice(len(X_test), size=n_samples, replace=False)

    for idx in test_indices:
        row = X_test.iloc[idx].to_dict()
        y_true = y_test.iloc[idx]
        
        # Randomly mask features
        partial_input = {
            k: (v if rng.random() > missing_ratio else "")
            for k, v in row.items()
        }

        # Use the Monte Carlo predictor (already encapsulated)
        prob = pp.predict_from_partial(partial_input)
        pred = 1 if prob >= 0.3 else 0

        probs.append(prob)
        preds.append(pred)
        trues.append(y_true)

    # === 5) Evaluate ===
    acc = accuracy_score(trues, preds)
    auc = roc_auc_score(trues, probs)

    print("\n=== Partial Input Test Results ===")
    print(f"Missing ratio: {missing_ratio:.0%}")
    print(f"Samples tested: {n_samples}")
    print(f"Accuracy: {acc:.4f}")
    print(f"ROC-AUC:  {auc:.4f}")

In [6]:
for x in np.arange(0, 1.1, 0.1):
    Monte_Carlo_test(x)


=== Partial Input Test Results ===
Missing ratio: 0%
Samples tested: 300
Accuracy: 0.6933
ROC-AUC:  0.8051


KeyboardInterrupt: 