In [1]:

# Import libraries
import numpy as np
import pandas as pd
import os
import joblib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, r2_score

# Load dataset
DATA_PATH = "../data/Bengaluru_House_Data.csv"
df = pd.read_csv(DATA_PATH)

# Basic cleaning
df.drop(columns=["society"], inplace=True, errors="ignore")

# Convert size ‚Üí number of rooms
df["size"] = df["size"].str.extract("(\d+)").astype(float)

# Convert total_sqft ranges to average
def convert_sqft(value):
    try:
        if "-" in value:
            low, high = value.split("-")
            return (float(low) + float(high)) / 2
        return float(value)
    except:
        return np.nan

df["total_sqft"] = df["total_sqft"].astype(str).apply(convert_sqft)

# Drop missing values
df.dropna(inplace=True)

# Feature selection
X = df.drop("price", axis=1)
y = df["price"]

# Identify column types
numeric_features = ["total_sqft", "bath", "balcony", "size"]
categorical_features = ["location", "availability"]

# Preprocessing pipeline
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown="ignore")

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ]
)

# Model pipeline
model = Pipeline(steps=[
    ("preprocessing", preprocessor),
    ("regressor", LinearRegression())
])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train model
model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)


# Save model
MODEL_PATH = "../models/house_price_model.pkl"
joblib.dump(model, MODEL_PATH)


#loading saved model
import joblib
MODEL_PATH="../models/house_price_model.pkl"
loaded_model=joblib.load(MODEL_PATH)


#create new house data
new_house=pd.DataFrame({"total_sqft":[1200],
                        "bath":[2],
                        "balcony":[1],
                        "size":[2],
                        "location":["Whitefield"],
                        "availability":["Ready To Move"]})

#predict price
predicted_price=loaded_model.predict(new_house)

# ==============================
# Evaluation Report
# ==============================
print("\n" + "="*50)
print("        BENGALURU HOUSE PRICE PREDICTION REPORT")
print("="*50)

print("\nüìä Model Performance Metrics")
print("-"*50)
print(f"Mean Absolute Error (MAE) : {mean_absolute_error(y_test, y_pred):.2f} Lakhs")
print(f"R¬≤ Score                 : {r2_score(y_test, y_pred):.3f}")

print("\nüíæ Model Status")
print("-"*50)
print("Model saved successfully!")
print("Model loaded successfully!")

# ==============================
# Prediction Report
# ==============================
print("\nüè† House Details for Prediction")
print("-"*50)
print(f"Location      : {new_house['location'][0]}")
print(f"Total Sqft    : {new_house['total_sqft'][0]}")
print(f"BHK           : {new_house['size'][0]}")
print(f"Bathrooms     : {new_house['bath'][0]}")
print(f"Balconies     : {new_house['balcony'][0]}")
print(f"Availability  : {new_house['availability'][0]}")

print("\nüí∞ Predicted House Price")
print("-"*50)
print(f"Estimated Price : ‚Çπ {predicted_price[0]:.2f} Lakhs")

print("\n" + "="*50)
print("        END OF REPORT")
print("="*50)






  df["size"] = df["size"].str.extract("(\d+)").astype(float)



        BENGALURU HOUSE PRICE PREDICTION REPORT

üìä Model Performance Metrics
--------------------------------------------------
Mean Absolute Error (MAE) : 39.15 Lakhs
R¬≤ Score                 : 0.497

üíæ Model Status
--------------------------------------------------
Model saved successfully!
Model loaded successfully!

üè† House Details for Prediction
--------------------------------------------------
Location      : Whitefield
Total Sqft    : 1200
BHK           : 2
Bathrooms     : 2
Balconies     : 1
Availability  : Ready To Move

üí∞ Predicted House Price
--------------------------------------------------
Estimated Price : ‚Çπ 82.01 Lakhs

        END OF REPORT
