In [7]:
# House price Predection

# Step 1: Import required libraries
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Step 2: Create a sample dataset
data = {
    'Area': [1400, 1600, 1700, 1875, 1100, 1550, 2350],
    'Bedrooms': [3, 3, 3, 4, 2, 3, 4],
    'Age': [10, 5, 3, 8, 20, 18, 4],

    'Price': [245000, 312000, 279000, 308000, 199000, 219000, 405000]
}

df = pd.DataFrame(data)

# Step 3: Prepare the features and target

X = df[['Area', 'Bedrooms', 'Age']]   # Features (independent)
y = df['Price']                       # Target.  (dependent)

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Step 5: Train the Linear Regression model
model = LinearRegression()

model.fit(X_train, y_train)

# Step 6: Predict on test data
y_pred = model.predict(X_test)

# Step 7: Evaluate the model
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))


Coefficients: [   1720. -825000.  -35500.]
Intercept: 667000.0000000021
Mean Squared Error: 494414833333.333
R2 Score: -172.72271023658925


In [None]:
# What Are Ensemble Models?
# Ensemble models combine predictions from multiple base
# models to improve performance, reduce overfitting, and increase accuracy.

# | Ensemble Method | Core Idea                                                             | Examples                             |
# | --------------- | --------------------------------------------------------------------- | ------------------------------------ |
# | **Bagging**     | Train multiple models in parallel on different subsets of data        | Random Forest                        |
# | **Boosting**    | Train models sequentially, where each corrects errors of the previous | AdaBoost, Gradient Boosting, XGBoost |
# | **Stacking**    | Combine predictions of multiple models using a meta-model             | Blend of LR + RF + GBM               |


In [None]:
# 1. Bagging (Bootstrap Aggregation)
# Multiple models trained on random samples with replacement

# Final output: majority vote (classification) / average (regression)

In [2]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_estimators=100)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
predictions

array([284240., 260850., 257070.])

In [3]:
# 2. Boosting
# Models are trained sequentially

# Each new model focuses on correcting mistakes of previous one

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
predictions

array([307998.26686872, 254111.06412722, 254111.06412722])

In [4]:
# XGBoost (Extreme Gradient Boosting)

In [None]:
from xgboost import XGBRegressor

model = XGBRegressor(n_estimators=100, learning_rate=0.1)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
predictions

array([307613.72, 244986.66, 244986.66], dtype=float32)

In [5]:
# 3. Stacking
# Combines predictions from multiple models (level 0) using another model (level 1 or meta-model)
# Powerful when different models perform well on different parts of the dataset

In [6]:
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold # Import KFold

base_models = [
    ('dt', DecisionTreeRegressor()),
    ('rf', RandomForestRegressor())
]

meta_model = RidgeCV()

# Define the cross-validation strategy with n_splits <= number of training samples
# In this case, X_train has 4 samples, so n_splits=4 is appropriate.
kf = KFold(n_splits=4)

# Pass the cross-validation strategy to the StackingRegressor
model = StackingRegressor(estimators=base_models, final_estimator=meta_model, cv=kf)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
predictions

array([328142.54540525, 257249.79013719, 264353.6132995 ])