In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
warnings.filterwarnings('ignore')

In [None]:

def load_and_prepare_data():
    # Load California Housing dataset
    housing = fetch_california_housing()
    data = pd.DataFrame(housing.data, columns=housing.feature_names)
    data['PRICE'] = housing.target
    
    # Split features and target
    X = data.drop('PRICE', axis=1)
    y = data['PRICE']
    
    # Split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return X_train_scaled, X_test_scaled, y_train, y_test, housing.feature_names

def train_model(X_train, y_train):
    # Initialize and train Random Forest model
    model = RandomForestRegressor(
        n_estimators=100,
        max_depth=10,
        random_state=42
    )
    model.fit(X_train, y_train)
    return model

def evaluate_model(model, X_test, y_test):
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    return {
        'RMSE': rmse,
        'MAE': mae,
        'R2 Score': r2
    }

def print_feature_importance(model, feature_names):
    # Get feature importance
    importances = model.feature_importances_
    feature_imp = pd.Series(importances, index=feature_names).sort_values(ascending=False)
    
    print("\nFeature Importance:")
    for feature, importance in feature_imp.items():
        print(f"{feature}: {importance:.4f}")

def main():
    # Load and prepare data
    print("Loading and preparing data...")
    X_train, X_test, y_train, y_test, feature_names = load_and_prepare_data()
    
    # Train model
    print("Training model...")
    model = train_model(X_train, y_train)
    
    # Evaluate model
    print("Evaluating model...")
    metrics = evaluate_model(model, X_test, y_test)
    
    # Print results
    print("\nModel Performance Metrics:")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.4f}")
        
    # Print feature importance
    print_feature_importance(model, feature_names)

In [2]:
if __name__ == "__main__":
    main() 

Loading and preparing data...
Training model...
Evaluating model...

Model Performance Metrics:
RMSE: 0.5443
MAE: 0.3663
R2 Score: 0.7739

Feature Importance:
MedInc: 0.5938
AveOccup: 0.1398
Latitude: 0.0766
Longitude: 0.0761
HouseAge: 0.0479
AveRooms: 0.0315
Population: 0.0173
AveBedrms: 0.0170
