In [2]:
import gradio as gr
import pandas as pd
import numpy as np
import pickle
import warnings
warnings.filterwarnings('ignore')

def get_default_values():
    """Returns a dictionary of default values for all features"""
    return {
        'Id': 0,
        'MSSubClass': 60,
        'LotFrontage': 70,
        'LotArea': 8500,
        'OverallQual': 7,
        'OverallCond': 5,
        'YearBuilt': 1990,
        'YearRemodAdd': 1990,
        'MasVnrArea': 100,
        'BsmtFinSF1': 800,
        'BsmtFinSF2': 0,
        'BsmtUnfSF': 200,
        'TotalBsmtSF': 1000,
        '1stFlrSF': 1200,
        '2ndFlrSF': 800,
        'LowQualFinSF': 0,
        'GrLivArea': 2000,
        'BsmtFullBath': 1,
        'BsmtHalfBath': 0,
        'FullBath': 2,
        'HalfBath': 1,
        'BedroomAbvGr': 3,
        'KitchenAbvGr': 1,
        'TotRmsAbvGrd': 8,
        'Fireplaces': 1,
        'GarageYrBlt': 1990,
        'GarageCars': 2,
        'GarageArea': 480,
        'WoodDeckSF': 100,
        'OpenPorchSF': 50,
        'EnclosedPorch': 0,
        '3SsnPorch': 0,
        'ScreenPorch': 0,
        'PoolArea': 0,
        'MiscVal': 0,
        'MoSold': 6,
        'YrSold': 2024,
        'MSZoning': 'RL',
        'Street': 'Pave',
        'LotShape': 'Reg',
        'LandContour': 'Lvl',
        'Utilities': 'AllPub',
        'LotConfig': 'Inside',
        'LandSlope': 'Gtl',
        'Neighborhood': 'NAmes',
        'Condition1': 'Norm',
        'Condition2': 'Norm',
        'BldgType': '1Fam',
        'HouseStyle': '2Story',
        'RoofStyle': 'Gable',
        'RoofMatl': 'CompShg',
        'Exterior1st': 'VinylSd',
        'Exterior2nd': 'VinylSd',
        'MasVnrType': 'None',
        'ExterQual': 'TA',
        'ExterCond': 'TA',
        'Foundation': 'PConc',
        'BsmtQual': 'TA',
        'BsmtCond': 'TA',
        'BsmtExposure': 'No',
        'BsmtFinType1': 'GLQ',
        'BsmtFinType2': 'Unf',
        'Heating': 'GasA',
        'HeatingQC': 'Ex',
        'CentralAir': 'Y',
        'Electrical': 'SBrkr',
        'KitchenQual': 'Gd',
        'Functional': 'Typ',
        'GarageType': 'Attchd',
        'GarageFinish': 'Fin',
        'GarageQual': 'TA',
        'GarageCond': 'TA',
        'PavedDrive': 'Y',
        'SaleType': 'WD',
        'SaleCondition': 'Normal'
    }

# Load XGBoost model, preprocessor, and selected features
def load_model_and_features():
    """Load trained XGBoost model, preprocessor, and selected features"""
    with open('xgboost_model_rf_selection.pkl', 'rb') as f:
        model = pickle.load(f)
    with open('preprocessor.pkl', 'rb') as f:
        preprocessor = pickle.load(f)
    with open('selected_features.pkl', 'rb') as f:
        selected_features = pickle.load(f)
    return model, preprocessor, selected_features

# Load model and features at startup
xgb_model, preprocessor, selected_features = load_model_and_features()

def predict_price(OverallQual, GrLivArea, GarageCars, TotalBsmtSF, FullBath, YearBuilt,
                 Neighborhood, KitchenQual, GarageType, ExterQual):
    """Predict house price based on input features using Stacked Regressor with RF feature selection"""
    # Get default values and update with user inputs
    input_dict = get_default_values()
    
    # Update with user-provided values
    updates = {
        'OverallQual': OverallQual,
        'GrLivArea': GrLivArea,
        'GarageCars': GarageCars,
        'TotalBsmtSF': TotalBsmtSF,
        'FullBath': FullBath,
        'YearBuilt': YearBuilt,
        'Neighborhood': Neighborhood,
        'KitchenQual': KitchenQual,
        'GarageType': GarageType,
        'ExterQual': ExterQual,
    }
    input_dict.update(updates)
    
    # Create DataFrame with all required columns
    input_data = pd.DataFrame([input_dict])
    
    # Add engineered features
    input_data['Age'] = 2024 - input_data['YearBuilt']
    input_data['Remodeled'] = (input_data['YearBuilt'] != input_data['YearRemodAdd']).astype(int)
    input_data['TotalSF'] = input_data['TotalBsmtSF'] + input_data['1stFlrSF'] + input_data['2ndFlrSF']
    input_data['TotalBath'] = input_data['FullBath'] + 0.5 * input_data['HalfBath'] + \
                             input_data['BsmtFullBath'] + 0.5 * input_data['BsmtHalfBath']
    input_data['Qual*Area'] = input_data['GrLivArea'] * input_data['OverallQual']
    
    # Transform data using preprocessor
    input_transformed = preprocessor.transform(input_data)
    
    # Get feature names from preprocessor
    all_feature_names = preprocessor.get_feature_names_out().tolist()
    
    # Adjust selected features to match preprocessor naming convention
    adjusted_selected_features = []
    for feat in selected_features:
        # Check if it's a categorical feature (contains '_')
        if '_' in feat:
            adjusted_feat = f'cat__{feat}'
        else:
            adjusted_feat = f'num__{feat}'
        adjusted_selected_features.append(adjusted_feat)
    
    # Filter valid features
    valid_selected_features = [feat for feat in adjusted_selected_features if feat in all_feature_names]
    if len(valid_selected_features) != len(adjusted_selected_features):
        print(f"Warning: Some adjusted features not found. Expected {len(adjusted_selected_features)}, "
              f"found {len(valid_selected_features)} features.")
        print("Adjusted features:", adjusted_selected_features)
        print("All feature names:", all_feature_names)
    
    if not valid_selected_features:
        raise ValueError("No valid features found for prediction after adjustment.")
    
    # Get indices of valid selected features
    selected_indices = [all_feature_names.index(feat) for feat in valid_selected_features]
    
    # Select only the valid features
    input_selected = input_transformed[:, selected_indices]
    
    # Get prediction from XGBoost model
    prediction = np.expm1(xgb_model.predict(input_selected))[0]
    
    # Prepare output with feature selection info
    output_text = (
        f"Predicted House Price (Stacked Regressor with RF Feature Selection):\n"
        f"${prediction:,.2f}\n\n"
        f"Using {len(valid_selected_features)} of top 30 features selected by Random Forest"
    )
    
    return output_text

# Create Gradio interface
iface = gr.Interface(
    fn=predict_price,
    inputs=[
        gr.Slider(1, 10, value=7, step=1, label="Overall Quality (1-10)"),
        gr.Number(value=2000, label="Above Ground Living Area (sq ft)"),
        gr.Number(value=2, label="Garage Cars Capacity"),
        gr.Number(value=1000, label="Total Basement Area (sq ft)"),
        gr.Number(value=2, label="Full Bathrooms"),
        gr.Number(value=1990, label="Year Built"),
        gr.Dropdown(
            choices=['NAmes', 'CollgCr', 'OldTown', 'Edwards', 'Somerst', 'NridgHt'],
            value='NAmes',
            label="Neighborhood"
        ),
        gr.Dropdown(
            choices=['Ex', 'Gd', 'TA', 'Fa', 'Po'],
            value='Gd',
            label="Kitchen Quality"
        ),
        gr.Dropdown(
            choices=['Attchd', 'Detchd', 'BuiltIn', 'CarPort', 'None'],
            value='Attchd',
            label="Garage Type"
        ),
        gr.Dropdown(
            choices=['Ex', 'Gd', 'TA', 'Fa', 'Po'],
            value='TA',
            label="Exterior Quality"
        )
    ],
    outputs="text",
    title="House Price Predictor (Stacked Regressor with RF Feature Selection)",
    description="""
    Adjust the key features below to get an estimated house price using Stacked Regressor 
    with Random Forest feature selection (top 30 features). All other features 
    are set to typical values for a standard house.
    """,
    examples=[
        [7, 2000, 2, 1000, 2, 1990, 'NAmes', 'Gd', 'Attchd', 'TA'],
        [8, 2500, 3, 1200, 3, 2000, 'NridgHt', 'Ex', 'BuiltIn', 'Gd'],
        [6, 1800, 2, 900, 2, 1975, 'OldTown', 'TA', 'Detchd', 'TA']
    ]
)

# Launch the app
if __name__ == "__main__":
    iface.launch(share=True)



class KNN:
    def __init__(self, k=3):
        self.k = k
    def fit(self, X, y):
        self.X_train = np.array(X)
        self.y_train = np.array(y)
    def predict(self, X):
        X = np.array(X)
        predictions = [self._predict(x) for x in X]
        return np.array(predictions)
    def _predict(self, x):
        # Calculate Euclidean distances
        distances = np.linalg.norm(self.X_train - x, axis=1)
        # Get the k nearest samples
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = self.y_train[k_indices]
        # Majority vote
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://9bbd4d0a6e97b4bd1f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
