# 🏡 Housing Price Prediction using XGBoost
This project uses the Boston Housing dataset and XGBoost to build a regression model that predicts housing prices.

In [None]:
# 📦 Install Required Libraries (run only once)
!pip install pandas numpy matplotlib seaborn scikit-learn xgboost

## 📥 Step 1: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor

## 📊 Step 2: Load and Explore the Data

In [None]:
url = "https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv"
df = pd.read_csv(url)

df.head()

In [None]:
# Info and summary statistics
print(df.info())
df.describe()

## ✅ Step 3: Preprocess the Data

In [None]:
X = df.drop('medv', axis=1)  # Features
y = df['medv']               # Target

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling (optional for XGBoost, but good for consistency)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## 🚀 Step 4: Train the XGBoost Model

In [None]:
model = XGBRegressor(objective='reg:squarederror', 
                    n_estimators=100, 
                    learning_rate=0.1, 
                    max_depth=4, 
                    random_state=42)

model.fit(X_train, y_train)

## 📈 Step 5: Evaluate the Model

In [None]:
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R2 Score: {r2:.2f}")

## 📌 Step 6: Predict a New Value

In [None]:
# Predict a single sample
sample = X_test[0].reshape(1, -1)
predicted_price = model.predict(sample)
print(f"Predicted house price: ${predicted_price[0]*1000:.2f}")

## 📉 Step 7: Visualize Actual vs Predicted Prices

In [None]:
plt.figure(figsize=(8,6))
sns.scatterplot(x=y_test, y=y_pred)
plt.xlabel("Actual Prices ($1000s)")
plt.ylabel("Predicted Prices ($1000s)")
plt.title("Actual vs Predicted House Prices")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], '--r')
plt.show()