# Combined all regression models

Exclude polynomials, XGBoost, CatBoost, LightGBM

In [None]:
import os, sys, pathlib

UTILS_FOLDER = "S00 - Utils"
curPath = os.getcwd()
parPath = pathlib.Path(curPath).parent
utilPath = os.path.join(parPath, UTILS_FOLDER)
for p in [curPath, str(parPath), utilPath]:
    sys.path.append(p)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from utils import plot_reg, plot_res, store_results
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor



In [None]:
# Load data
df = pd.read_excel(
    f"{utilPath}/housing_data.xlsx",
)

# Extract data
X = df[["LSTAT"]].values
y = df["MEDV"].values

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [None]:
modelSets = [
    {"model": LinearRegression, "params": {}},
    {"model": Ridge, "params": {"alpha": 1}},
    {"model": Lasso, "params": {"alpha": 1}},
    {"model": ElasticNet, "params": {"alpha": 1, "l1_ratio": 0.5}},
    {"model": SVR, "params": {"kernel": "rbf", "C": 1, "epsilon": 0.1}},
    {"model": DecisionTreeRegressor, "params": {"max_depth": 3}},
    {"model": RandomForestRegressor, "params": {"n_estimators": 100, "max_depth": 3}},
    {"model": AdaBoostRegressor, "params": {"estimator": None, "n_estimators": 50, "learning_rate": 0.1}},
    {"model": GradientBoostingRegressor, "params": {"n_estimators": 50, "learning_rate": 1.0, "max_depth": 1}},
    {"model": KNeighborsRegressor, "params": {"n_neighbors": 5}},
]

In [None]:
results = []

for m in modelSets:

    values = m["params"]
    Model  = m["model"]
    # Create model
    model =  Model(**values)

    # Train model
    model.fit(X_train, y_train)

    # Predict
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Store results
    store_results(
        results=results,
        model_name= Model.__name__,
        model=model,
        y_train=y_train,
        y_test=y_test,
        y_train_pred=y_train_pred,
        y_test_pred=y_test_pred,
        params=values,
    )

    # Plotting
    plot_reg(X_train, X_test, y_train, y_test, y_train_pred, y_test_pred)
    # plot_res(y_train, y_test, y_train_pred, y_test_pred)

df = pd.DataFrame.from_records(results)
df = df.fillna('-')
display(df)