# House Price Prediction Model Analysis

This notebook demonstrates how to use the trained models and analyze their performance.

In [None]:
# Setup imports
import sys
from pathlib import Path

# Add project root to path
project_root = Path().absolute().parent
sys.path.insert(0, str(project_root))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from models.model_loader import get_model_instance
from models.train_model import generate_synthetic_data

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

In [None]:
# Load the trained model
model = get_model_instance('random_forest')
print(f"Model loaded: {model.is_loaded()}")
print(f"Model info: {model.get_model_info()}")

In [None]:
# Generate test data
test_data = generate_synthetic_data(100)
print(f"Test data shape: {test_data.shape}")
test_data.head()

In [None]:
# Make predictions
features = test_data[['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'grade']]
predictions = []

for _, row in features.iterrows():
    pred = model.predict(row.to_dict())
    predictions.append(pred)

test_data['predicted_price'] = predictions
print(f"Predictions completed for {len(predictions)} samples")

In [None]:
# Visualize predictions vs actual
plt.figure(figsize=(10, 6))
plt.scatter(test_data['price'], test_data['predicted_price'], alpha=0.6)
plt.plot([test_data['price'].min(), test_data['price'].max()], 
         [test_data['price'].min(), test_data['price'].max()], 'r--', lw=2)
plt.xlabel('Actual Price ($)')
plt.ylabel('Predicted Price ($)')
plt.title('Actual vs Predicted House Prices')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Feature importance analysis
importance = model.get_feature_importance()
if importance:
    importance_df = pd.DataFrame(list(importance.items()), columns=['Feature', 'Importance'])
    importance_df = importance_df.sort_values('Importance', ascending=True)
    
    plt.figure(figsize=(10, 6))
    plt.barh(importance_df['Feature'], importance_df['Importance'])
    plt.xlabel('Feature Importance')
    plt.title('Feature Importance in House Price Prediction')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    print("\nFeature Importance Rankings:")
    for _, row in importance_df.sort_values('Importance', ascending=False).iterrows():
        print(f"{row['Feature']}: {row['Importance']:.3f}")