In [None]:
import pandas as pd
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Load your Excel file
# Make sure to `pip install openpyxl`
df = pd.read_excel('./synthetic_housing_data.xlsx')  # <<< updated for Excel

# Choose your features and target
features = [
    'Age', 'Adults', 'Children', 'Rent', 'IsStudent',
    'Distance_to_New_Tenancy', 'Total_Rooms', 'Area_m2',
    'Hospital_distance', 'Gym_distance', 'School_distance',
    'Supermarket_distance', 'Distance_to_University'
]
X = df[features]
y = df['Label']  # or your chosen target column

# Train/validation split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)



xgb_model = Pipeline(
    steps=[
        ("regressor", XGBRegressor(n_estimators=1000, learning_rate=0.05, max_depth=6, random_state=42)),
    ]
)

lgbm_model = Pipeline(
    steps=[
        (
            "regressor",
            LGBMRegressor(n_estimators=1000, learning_rate=0.05, max_depth=-1, random_state = 42),
        ),
    ]
)



# Stacking ensemble
stacked_model = StackingRegressor(
    estimators=[ ("xgb", xgb_model), ("lightgbm", lgbm_model)],
    final_estimator=LinearRegression(),
    cv=5,
)

stacked_model.fit(X_train, y_train)

y_pred = stacked_model.predict(X_val)

# Evaluation
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)


print(f"Mean Absolute Error: {mae:.2f}")
print(f"Mean Squared Error: {mse:.2f}")

In [None]:
new_data = pd.DataFrame(
    {
        'Age':[35], 'Adults':[2], 'Children':[1], 'Rent':[20], 'IsStudent':[False],
    'Distance_to_New_Tenancy':[1], 'Total_Rooms':[3], 'Area_m2':[90],
    'Hospital_distance':[1], 'Gym_distance':[3], 'School_distance':[2],
    'Supermarket_distance':[1], 'Distance_to_University':[5]
    }
)


predicted_desirability = stacked_model.predict(new_data)
print(predicted_desirability)

[0.69596019]
