In [1]:
# ==========================
# Imports
# ==========================
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.ensemble import StackingRegressor

In [2]:
# ==========================
# Load Data
# ==========================
df = pd.read_csv("processed_train_data.csv")
train_df = train_df = pd.read_csv("C:/Users/Mihir S Kagalkar/OneDrive - iiit-b/SEM-5/ML/PROJECT/Hotel-Property-Value-Dataset/train.csv")

TARGET_COL = "HotelValue"

X = df
y = train_df[TARGET_COL]


In [3]:
#===========================
# Define Models
# ==========================
tree_model = GradientBoostingRegressor(random_state=42)
linear_model = LinearRegression()

# Combine both models using stacking (meta-ensemble)
hybrid_model = StackingRegressor(
    estimators=[
        ('tree', tree_model),
        ('linear', linear_model)
    ],
    final_estimator=LinearRegression()
)

# ==========================
# Pipeline (with Scaling)
# ==========================
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # scales numeric features
    ('model', hybrid_model)
])

# ==========================
# Cross-Validation Setup
# ==========================
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# RMSE scorer
rmse_scorer = make_scorer(lambda y_true, y_pred:
                          np.sqrt(mean_squared_error(y_true, y_pred)),
                          greater_is_better=False)

# ==========================
# Cross-Validate
# ==========================
cv_scores = cross_val_score(pipeline, X, y, scoring=rmse_scorer, cv=kfold)

print("RMSE for each fold:", -cv_scores)
print("Average RMSE:", -np.mean(cv_scores))

RMSE for each fold: [27252.70315106 25783.95015133 21892.41840719 26536.72487855
 36487.52781216]
Average RMSE: 27590.66488005746
