In [102]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd

df = pd.read_excel('../../../Data/Cleaned_Combined.xlsx')

columns_to_scale = ['num_of_rooms', 'area']
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(df[columns_to_scale])
X_scaled_df = pd.DataFrame(X_scaled, columns=columns_to_scale)
df = pd.concat([X_scaled_df, df.drop(columns_to_scale, axis=1)], axis=1)

X = df.drop('price', axis=1)
y = df['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [103]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import GridSearchCV

all_models = {
    "Linear Regression": {
        "model": LinearRegression(),
        "params": {}
    },
    "Gradient Boosting": {
        "model": GradientBoostingRegressor(),
        "params": {
            'learning_rate': [0.1, 0.05, 0.01],
            'n_estimators': [100, 200, 300],
            'max_depth': [3, 4, 5]
        }
    },
}

# Grid search and evaluation for each model
for name, model_data in all_models.items():
    model = model_data["model"]
    params = model_data["params"]
    
    # Apply scaling if needed
    if "Regressor" in name:
        model = make_pipeline(RobustScaler(), model)
    
    grid_search = GridSearchCV(model, params, cv=5, scoring='neg_mean_absolute_error')
    grid_search.fit(X_train, y_train)
    
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    r2 = r2_score(y_test, y_pred)
    
    print(f"Model: {name}")
    print("Best Parameters:", grid_search.best_params_)
    print(f"Mean Absolute Error: {mae}")
    print(f"Root Mean Squared Error: {rmse}")
    print(f"R-squared: {r2}")
    print("-" * 30)

Model: Linear Regression
Best Parameters: {}
Mean Absolute Error: 533667266.85258967
Root Mean Squared Error: 717743223.7766279
R-squared: 0.3509294363557114
------------------------------
