In [None]:
import numpy as np
import pandas as pd
import xgboost as xgb

In [None]:
TRAINING_DATA_PATH = "train(1).xlsx"
DATA_PATH = "processed_housing_dataset.xlsx"
OUTPUT_PATH = "tabular_price_predictions.csv"

df1 = pd.read_excel(TRAINING_DATA_PATH)
df2 = pd.read_excel(DATA_PATH)

cols_to_remove = ["id", "price", "date"]  # price may not exist in test set
y_log = np.log1p(df1["price"].values)
X = (df1.drop(columns=[c for c in cols_to_remove if c in df1.columns])
      .select_dtypes(include=[np.number])
      .fillna(0)
      .values
)

In [None]:
model = xgb.XGBRegressor(
    n_estimators=1500,
    learning_rate=0.02,
    max_depth=6,
    n_jobs=-1,
    random_state=42
)

model.fit(X, y_log)


In [None]:
ids = df2["id"].values

X_test = (df2.drop(columns=[c for c in cols_to_remove if c in df2.columns])
      .select_dtypes(include=[np.number])
      .fillna(0)
      .values
)

In [None]:
# Predict in log scale
preds_log = model.predict(X_test)

# Convert back to real prices
preds = np.expm1(preds_log)


In [None]:
# Create output DataFrame
submission = pd.DataFrame({
    "id": ids,
    "predicted_price": preds
})

# Save file
submission.to_csv(OUTPUT_PATH, index=False)

print(f"Predictions saved to: {OUTPUT_PATH}")
