In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from xgboost import XGBRegressor
import joblib
import matplotlib.pyplot as plt
import os

In [None]:
os.listdir("/kaggle/input")
df = pd.read_csv("/kaggle/input/housedata/data.csv")
print("Dataset Shape:",df.shape)
df.head()

In [None]:
df.info()
df.isnull().sum()

In [None]:
df = df.dropna()
print("After Cleaning:", df.shape)

In [None]:
X = df.drop("price",axis=1)
y = df["price"]

In [None]:
X=pd.get_dummies(X)
print("Total Features:", X.shape[1])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
model=XGBRegressor(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.05,
    objective="reg:squarederror"
)
model.fit(X_train,y_train)
print("Model trained successfully")

In [None]:
pred=model.predict(X_test)
mae=mean_absolute_error(y_test,pred)
r2=r2_score(y_test,pred)

print("MAE:", mae)
print("R2 Score:", r2)

In [None]:
import matplotlib.pyplot as plt
imp =pd.Series(model.feature_importances_,index=X.columns)
imp.sort_values(ascending=False).head(15).plot(kind="barh")
plt.title("Top 15 Important Features")
plt.show()

In [None]:
joblib.dump(model, "property_model.pkl")
joblib.dump(X.columns.tolist(),"features.pkl")

In [None]:
sample={
    "locality":"Vaishali Nagar",
    "area_sqft":1600,
    "bedrooms":3,
    "bathrooms":2,
    "property_type":"Apartment",
    "age_years":4,
    "floor":3,
    "total_floors":10,
    "amenities_score":7
}

sample_df=pd.DataFrame([sample])
sample_df=pd.get_dummies(sample_df)
sample_df=sample_df.reindex(columns=X.columns, fill_value=0)

price=model.predict(sample_df)[0]
print("Predicted Price: $", round(price))

In [None]:
df["predicted_price"]=model.predict(X)
df.to_csv("predicted_prices.csv", index=False)
print("File saved: predicted_prices.csv")

In [None]:
import joblib
joblib.dump(model,"model.pkl")
print("saved")