In [None]:
# # Save models that work correctly
# import joblib

# working_models = {}

# # Linear Regression
# try:
#     joblib.dump(linearRegression, 'linear_regression_model.pkl')
#     working_models['linear_regression'] = linearRegression
#     print("✓ Linear Regression saved")
# except Exception as e:
#     print(f"✗ Error saving Linear Regression: {e}")

# # Random Forest (from GridSearchCV)
# try:
#     joblib.dump(randomForest.best_estimator_, 'random_forest_model.pkl')
#     working_models['random_forest'] = randomForest.best_estimator_
#     print("✓ Random Forest saved")
# except Exception as e:
#     print(f"✗ Error saving Random Forest: {e}")

# # XGBoost
# try:
#     joblib.dump(xgb, 'xgboost_model.pkl')
#     working_models['xgboost'] = xgb
#     print("✓ XGBoost saved")
# except Exception as e:
#     print(f"✗ Error saving XGBoost: {e}")

# # LightGBM
# try:
#     joblib.dump(lgbm_regressor, 'lightgbm_model.pkl')
#     working_models['lightgbm'] = lgbm_regressor
#     print("✓ LightGBM saved")
# except Exception as e:
#     print(f"✗ Error saving LightGBM: {e}")

# # For tuned models, we need to extract just the best estimator
# # XGBoost Tuned
# try:
#     if hasattr(xgb_tuned, 'best_estimator_'):
#         joblib.dump(xgb_tuned.best_estimator_, 'xgboost_tuned_model.pkl')
#         working_models['xgboost_tuned'] = xgb_tuned.best_estimator_
#         print("✓ XGBoost Tuned saved")
#     else:
#         print("✗ XGBoost Tuned doesn't have best_estimator_")
# except Exception as e:
#     print(f"✗ Error saving XGBoost Tuned: {e}")

# # LightGBM Tuned
# try:
#     if hasattr(lgbm_random_search, 'best_estimator_'):
#         joblib.dump(lgbm_random_search.best_estimator_, 'lightgbm_tuned_model.pkl')
#         working_models['lightgbm_tuned'] = lgbm_random_search.best_estimator_
#         print("✓ LightGBM Tuned saved")
#     else:
#         print("✗ LightGBM Tuned doesn't have best_estimator_")
# except Exception as e:
#     print(f"✗ Error saving LightGBM Tuned: {e}")

In [None]:
# performance = pd.DataFrame({
#     'Metrics':['MAE','MAPE','R²'],
#     'Linear Regression':[lr_mae,lr_mape,lr_r2],
#     'Random Forest Regression':[rf_mae,rf_mape,rf_r2],
#     'XGB Regression':[xgb_mae,xgb_mape,xgb_r2],
#     'XGB Tuned Regression':[xgb_tuned_mae,xgb_tuned_mape,xgb_tuned_r2],
#     'LightGBM Regression':[lgbm_mae,lgbm_mape,lgbm_r2],
#     'LightGBM Tuned Regression':[lgbmt_mae,lgbmt_mape,lgbmt_r2],
# })

# performance.to_csv('model_performance.csv',index=False)

In [11]:
import pandas as pd
import numpy as np
import joblib
from sklearn.pipeline import Pipeline

class RentPricePredictor:
    """
    A class to load and use pre-trained rent price prediction models.
    
    Usage:
        predictor = RentPricePredictor()
        predicted_price = predictor.predict(model_name='random_forest', 
                                           property_data=property_df)
    """
    
    def __init__(self, models_dir='.'):
        """
        Initialize the predictor.
        
        Parameters:
        models_dir (str): Directory where model files are stored
        """
        self.models_dir = models_dir
        self.loaded_models = {}
        self.model_features = {}  # Store expected features for each model
        
    def load_model(self, model_name):
        """
        Load a specific model into memory.
        
        Parameters:
        model_name (str): Name of the model to load
        
        Returns:
        model: Loaded model object
        """
        try:
            model_path = f"{self.models_dir}/{model_name}_model.pkl"
            model = joblib.load(model_path)
            self.loaded_models[model_name] = model
            
            # Try to extract feature names from the model
            try:
                # For pipeline models with a preprocessor
                if hasattr(model, 'named_steps') and 'preprocessor' in model.named_steps:
                    preprocessor = model.named_steps['preprocessor']
                    
                    # Get numerical features
                    num_features = []
                    if 'num' in preprocessor.named_transformers_:
                        num_features = preprocessor.named_transformers_['num'].feature_names_in_
                    
                    # Get categorical features
                    cat_features = []
                    if 'cat' in preprocessor.named_transformers_:
                        cat_features = preprocessor.named_transformers_['cat'].feature_names_in_
                    
                    # Combine all features
                    self.model_features[model_name] = list(num_features) + list(cat_features)
                else:
                    # For non-pipeline models, try to get feature names
                    if hasattr(model, 'feature_names_in_'):
                        self.model_features[model_name] = model.feature_names_in_
            except Exception as e:
                print(f"Could not extract feature names for {model_name}: {e}")
                self.model_features[model_name] = None
            
            print(f"✓ {model_name} model loaded successfully")
            return model
        except FileNotFoundError:
            print(f"Model file '{model_name}_model.pkl' not found.")
            return None
        except Exception as e:
            print(f"Error loading {model_name} model: {e}")
            return None
    
    def prepare_input_data(self, property_data, model_name):
        """
        Prepare input data by ensuring it has all the features the model expects.
        
        Parameters:
        property_data (DataFrame): Input property data
        model_name (str): Name of the model being used
        
        Returns:
        DataFrame: Prepared data with all expected features
        """
        # If we don't know the expected features, return as-is
        if model_name not in self.model_features or self.model_features[model_name] is None:
            return property_data
        
        expected_features = self.model_features[model_name]
        input_features = property_data.columns.tolist()
        
        # Add missing features with default values
        missing_features = [f for f in expected_features if f not in input_features]
        
        if missing_features:
            print(f"Adding missing features with default values: {missing_features}")
            
            # Create a copy of the data
            prepared_data = property_data.copy()
            
            # Add missing features with appropriate default values
            for feature in missing_features:
                # Use 0 for numerical features, 'unknown' for categorical
                # You might need to adjust this based on your specific features
                if any(keyword in feature for keyword in ['count', 'num', 'size', 'sqft']):
                    prepared_data[feature] = 0  # Default for numerical features
                else:
                    prepared_data[feature] = 'unknown'  # Default for categorical features
        
            # Reorder columns to match expected feature order
            prepared_data = prepared_data[expected_features]
            return prepared_data
        
        # If no features are missing, just return the data
        return property_data
    
    def predict(self, model_name, property_data):
        """
        Predict rent price for a property using the specified model.
        
        Parameters:
        model_name (str): Name of the model to use
        property_data (DataFrame): Property features as a DataFrame
        
        Returns:
        float: Predicted rent price in BDT
        """
        # Load the model if not already loaded
        if model_name not in self.loaded_models:
            model = self.load_model(model_name)
            if model is None:
                return None
        else:
            model = self.loaded_models[model_name]
        
        try:
            # Prepare the input data (add missing features if needed)
            prepared_data = self.prepare_input_data(property_data, model_name)
            
            # Make prediction (model returns log(price))
            log_prediction = model.predict(prepared_data)
            
            # Convert back to original scale
            predicted_price = np.exp(log_prediction)
            
            return predicted_price[0]  # Return the first (and only) prediction
            
        except Exception as e:
            print(f"Error making prediction with {model_name}: {e}")
            return None
    
    def get_available_models(self):
        """
        Get a list of available models.
        
        Returns:
        list: Available model names
        """
        # This is a simple implementation - you might want to scan the directory
        # for actual model files
        try:
            performance_df = pd.read_csv(f'{self.models_dir}/model_performance.csv')
            return performance_df.keys()
        except FileNotFoundError:
            print("Performance metrics file not found.")
            return None
    
    def get_model_performance(self):
        """
        Load and return model performance metrics.
        
        Returns:
        DataFrame: Performance metrics for all models
        """
        try:
            performance_df = pd.read_csv(f'{self.models_dir}/model_performance.csv')
            return performance_df
        except FileNotFoundError:
            print("Performance metrics file not found.")
            return None

###Predictions

In [12]:
# Create multiple properties
properties = pd.DataFrame({
    'num_bed_rooms': [2, 4, 1],
    'num_bath_rooms': [1, 3, 1],
    'area': [800.0, 6000.0, 600.0],
    'locality': ['Mirpur', 'Gulshan', 'Uttara'],
    'building_type': ['Apartment', 'Apartment', 'Apartment'],
})

# Make predictions for all properties
predictor = RentPricePredictor()
results = []

for i, property_row in properties.iterrows():
    price = predictor.predict('random_forest', pd.DataFrame([property_row]))
    if price is not None:
        results.append(price)
        print(f"Property {i+1}: {price:.2f} BDT")
    else:
        results.append(None)
        print(f"Property {i+1}: Prediction failed")

# Add predictions to the DataFrame
properties['predicted_price'] = results

✓ random_forest model loaded successfully
Adding missing features with default values: ['relaxation_amenity_count', 'security_amenity_count', 'maintenance_or_cleaning_amenity_count', 'social_amenity_count', 'expendable_amenity_count', 'service_staff_amenity_count', 'unclassify_amenity_count', 'building_nature']
Property 1: 13657.19 BDT
Adding missing features with default values: ['relaxation_amenity_count', 'security_amenity_count', 'maintenance_or_cleaning_amenity_count', 'social_amenity_count', 'expendable_amenity_count', 'service_staff_amenity_count', 'unclassify_amenity_count', 'building_nature']
Property 2: 153661.80 BDT
Adding missing features with default values: ['relaxation_amenity_count', 'security_amenity_count', 'maintenance_or_cleaning_amenity_count', 'social_amenity_count', 'expendable_amenity_count', 'service_staff_amenity_count', 'unclassify_amenity_count', 'building_nature']
Property 3: 14714.46 BDT


In [72]:
predictor.get_model_performance()

Unnamed: 0,Metrics,Linear Regression,Random Forest Regression,XGB Regression,XGB Tuned Regression,LightGBM Regression,LightGBM Tuned Regression
0,MAE,12422.314655,8940.260022,9016.968969,8951.411936,9137.795403,9180.068013
1,MAPE,0.250604,0.196969,0.189897,0.188689,0.195288,0.194325
2,R²,-1.149289,0.808631,0.782324,0.788256,0.782442,0.781586


In [74]:
predictor.get_available_models()

Index(['Metrics', 'Linear Regression', 'Random Forest Regression',
       'XGB Regression', 'XGB Tuned Regression', 'LightGBM Regression',
       'LightGBM Tuned Regression'],
      dtype='object')