# Notebook 5: Inferencing with Deployed Model

### In this notebook, we deploy the model previously logged to MLflow and make some predictions to demonstrate that it works. We also transform some of the data in the test set to practice model monitoring in Databricks.

In [5]:
import mlflow
import mlflow.sklearn
import pandas as pd
import os

# ----------------------------
# 1. Load registered model
# ----------------------------
model_name = "rf_ames_model"
model_version = 1 

# Construct the MLflow model URI
model_uri = f"models:/{model_name}/{model_version}"

# Load the model
loaded_model = mlflow.sklearn.load_model(model_uri)
print("Model loaded for inference!")



Model loaded for inference!


In [6]:
# ----------------------------
# 2. Load test data for evaluation
# ----------------------------

# --- FIX: Read from the local data folder ---
# Use relative paths so it works for anyone cloning the repo
X_test_path = "../data/X_test.csv"
y_test_path = "../data/y_test.csv"

# Load test features and target
X_test = pd.read_csv(X_test_path)
y_test = pd.read_csv(y_test_path).squeeze()  # squeeze to convert to Series

# Verification
print(f"Loaded X_test from: {os.path.abspath(X_test_path)}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")
print(X_test.head())
print(y_test.head())

Loaded X_test from: /Users/iankidwell/Documents/Housing-Price-MLops/data/X_test.csv
X_test shape: (586, 55), y_test shape: (586,)
   Order        PID  MS SubClass  Lot Frontage  Lot Area  Overall Qual  \
0   1358  903427090           70          68.0      5100             8   
1   2368  527450460          160          21.0      1890             6   
2   2823  908128100           60          62.0      7162             7   
3   2127  907135180           20          60.0      8070             4   
4   1545  910200080           30          50.0      7000             6   

   Overall Cond  Year Built  Year Remod/Add  Mas Vnr Area  ...  Open Porch SF  \
0             7        1925            1996           0.0  ...             63   
1             7        1972            1972         380.0  ...              0   
2             5        2003            2004         190.0  ...             57   
3             5        1994            1995           0.0  ...              0   
4             8     

In [7]:
import numpy as np
from sklearn.metrics import mean_squared_error

# ----------------------------
# 3. Make predictions on test set
# ----------------------------
preds_log = loaded_model.predict(X_test)

# Invert the log transform to get SalePrice
preds = np.expm1(preds_log)  # converts log(SalePrice+1) back to SalePrice
y_test_actual = np.expm1(y_test)  # convert actual log values back

# Calculate RMSE manually
rmse = np.sqrt(mean_squared_error(y_test_actual, preds))
print(f"Test RMSE on deployed model: {rmse:.2f}")

# Display the first 5 predictions
for i, price in enumerate(preds[:5], 1):
    print(f"Example {i} predicted SalePrice: ${price:.2f}")


Test RMSE on deployed model: 3305.82
Example 1 predicted SalePrice: $161847.65
Example 2 predicted SalePrice: $116013.15
Example 3 predicted SalePrice: $196344.90
Example 4 predicted SalePrice: $124188.16
Example 5 predicted SalePrice: $125872.61


In [9]:
import mlflow
import pandas as pd
import os

# --- FIX 1: Portable Experiment Name ---
# Replace the hardcoded path with a simple name.
# mlflow.set_experiment() automatically creates it if it doesn't exist.
experiment_name = "ames_housing_test_eval"
mlflow.set_experiment(experiment_name)

# Start a run in this experiment
with mlflow.start_run(run_name="deployed_model_test_eval") as run:
    
    # Log metric (Ensure 'rmse' variable exists from previous cells)
    mlflow.log_metric("test_rmse", rmse)

    # --- FIX 2: Save to Data Folder ---
    eval_df = pd.DataFrame({
        "y_true": y_test,  # Changed 'y_test_actual' to 'y_test' to match previous steps
        "y_pred": preds    # Ensure your predictions are named 'preds'
    })
    
    # Save to ../data/ instead of current folder
    eval_csv_path = "../data/test_predictions.csv"
    eval_df.to_csv(eval_csv_path, index=False)
    
    # Log the file as an artifact
    mlflow.log_artifact(eval_csv_path)

    print(f"Logged test evaluation to MLflow run {run.info.run_id}")
    print(f"Saved predictions to: {os.path.abspath(eval_csv_path)}")

Logged test evaluation to MLflow run a1efa3f0a87e4e3aa2fe8adb77eb6a8f
Saved predictions to: /Users/iankidwell/Documents/Housing-Price-MLops/data/test_predictions.csv


# Feature Changes

## This is done to test out the modeling monitoring dashboard in Databricks


In [None]:
# List all features in X_test
print("Features in the dataset:")
for i, col in enumerate(X_test.columns, 1):
    print(f"{i}. {col}")


Features in the dataset:
1. MS SubClass
2. Lot Frontage
3. Lot Area
4. Overall Qual
5. Overall Cond
6. Year Built
7. Year Remod/Add
8. Mas Vnr Area
9. Exter Qual
10. Exter Cond
11. Bsmt Qual
12. Bsmt Cond
13. Bsmt Exposure
14. BsmtFin Type 1
15. BsmtFin SF 1
16. BsmtFin Type 2
17. BsmtFin SF 2
18. Bsmt Unf SF
19. Total Bsmt SF
20. Heating QC
21. 1st Flr SF
22. 2nd Flr SF
23. Low Qual Fin SF
24. Gr Liv Area
25. Bsmt Full Bath
26. Bsmt Half Bath
27. Full Bath
28. Half Bath
29. Bedroom AbvGr
30. Kitchen AbvGr
31. Kitchen Qual
32. TotRms AbvGrd
33. Functional
34. Fireplaces
35. Fireplace Qu
36. Garage Yr Blt
37. Garage Finish
38. Garage Cars
39. Garage Area
40. Garage Qual
41. Garage Cond
42. Paved Drive
43. Wood Deck SF
44. Open Porch SF
45. Enclosed Porch
46. 3Ssn Porch
47. Screen Porch
48. Pool Area
49. Pool QC
50. Misc Val
51. Mo Sold
52. Yr Sold
53. SalePrice


In [10]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
import mlflow

# Make a copy of X_test so we don't overwrite original test data
X_test_mutated = X_test.copy()

# Specify the features to modify
feature_1 = "Lot Area"
feature_2 = "Fireplaces"

# Mutate the features: random perturbation
np.random.seed(42)  # for reproducibility
X_test_mutated[feature_1] = X_test_mutated[feature_1] + np.random.normal(0, 1, size=X_test_mutated.shape[0])
X_test_mutated[feature_2] = X_test_mutated[feature_2] + np.random.normal(0, 1, size=X_test_mutated.shape[0])

# Predict with the deployed model
mutated_preds_log = loaded_model.predict(X_test_mutated)
mutated_preds = np.expm1(mutated_preds_log)  # invert log-transform

# Evaluate RMSE on mutated data (manual square root)
# Note: Ensure 'y_test' is defined from your previous cells
y_test_actual = np.expm1(y_test)
mutated_rmse = mean_squared_error(y_test_actual, mutated_preds) ** 0.5
print(f"Test RMSE on mutated test data: {mutated_rmse:.2f}")

# ----------------------------
# Log evaluation with MLflow
# ----------------------------

# --- FIX: Portable Experiment Name ---
# Use the same experiment name we set in the previous cell
experiment_name = "ames_housing_test_eval"
mlflow.set_experiment(experiment_name)

with mlflow.start_run(run_name="mutated_test_eval") as run:
    mlflow.log_metric("mutated_test_rmse", mutated_rmse)
    print(f"Logged RMSE to MLflow run {run.info.run_id}")

Test RMSE on mutated test data: 3369.46
Logged RMSE to MLflow run a426a1daf6c04e159aae3b835b57d28c
