In [None]:
#  Import Libraries
import numpy as np
import pandas as pd
from xgboost import XGBRegressor


In [None]:
#  Load Data
train = pd.read_csv("/kaggle/input/house-prices-advanced-regression-techniques/train.csv")
test = pd.read_csv("/kaggle/input/house-prices-advanced-regression-techniques/test.csv")

In [None]:
#  Log-transform target to reduce skewness
train["SalePrice"] = np.log1p(train["SalePrice"])

In [None]:

#  Feature Engineering
train["TotalSF"] = train["TotalBsmtSF"] + train["1stFlrSF"] + train["2ndFlrSF"]
test["TotalSF"] = test["TotalBsmtSF"] + test["1stFlrSF"] + test["2ndFlrSF"]

train["TotalBath"] = (train["FullBath"] + 0.5 * train["HalfBath"] +
                      train["BsmtFullBath"] + 0.5 * train["BsmtHalfBath"])
test["TotalBath"] = (test["FullBath"] + 0.5 * test["HalfBath"] +
                     test["BsmtFullBath"] + 0.5 * test["BsmtHalfBath"])

train["TotalPorchSF"] = (train["OpenPorchSF"] + train["EnclosedPorch"] +
                         train["3SsnPorch"] + train["ScreenPorch"])
test["TotalPorchSF"] = (test["OpenPorchSF"] + test["EnclosedPorch"] +
                        test["3SsnPorch"] + test["ScreenPorch"])

In [None]:

#  Combine for preprocessing
train_id = train["Id"]
test_id = test["Id"]
combined = pd.concat([train.drop(["SalePrice", "Id"], axis=1), test.drop("Id", axis=1)], axis=0)

In [None]:
#Handle Missing Values
numeric_cols = combined.select_dtypes(include=['float64', 'int64']).columns
combined[numeric_cols] = combined[numeric_cols].fillna(combined[numeric_cols].mean())

categorical_cols = combined.select_dtypes(include=['object']).columns
combined[categorical_cols] = combined[categorical_cols].fillna(combined[categorical_cols].mode().iloc[0])


In [None]:
#  One-hot Encoding
combined = pd.get_dummies(combined)

In [None]:
#  Split back
X = combined[:len(train)]
X_test = combined[len(train):]
y = train["SalePrice"]

In [None]:
#  Train Model on Full Data
model = XGBRegressor(
    n_estimators=500,
    learning_rate=0.03,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    n_jobs=-1
    )
model.fit(X, y)

In [None]:
# 📈 Predict
test_preds = model.predict(X_test)
final_preds = np.expm1(test_preds)  # reverse log1p


In [None]:
 # Submission
submission = pd.DataFrame({
    "Id": test_id,
    "SalePrice": final_preds
})
submission.to_csv("submission.csv", index=False)