In [1]:
import pandas as pd
import lightgbm as lgb
import numpy as np
from hyperopt import fmin, tpe, hp, Trials


In [23]:
!pip install lazypredict

Collecting lazypredict
  Downloading lazypredict-0.2.12-py2.py3-none-any.whl (12 kB)
Installing collected packages: lazypredict
Successfully installed lazypredict-0.2.12


In [24]:
from lazypredict.Supervised import LazyRegressor
from sklearn.metrics import mean_squared_error

In [2]:
X_train = pd.read_csv("X_train.csv")
X_test = pd.read_csv("X_test.csv")
y_train = pd.read_csv("y_train.csv")
y_test = pd.read_csv("y_test.csv")

In [25]:
regressor = LazyRegressor(ignore_warnings=False, custom_metric=None)

# Fit and predict using LazyRegressor
models, predictions = regressor.fit(X_train, X_test, y_train, y_test)

# Print the models and their performance
print(models["RMSE"])

 74%|███████▍  | 31/42 [00:29<00:06,  1.63it/s]

QuantileRegressor model failed to execute
Solver interior-point is not anymore available in SciPy >= 1.11.0.


 98%|█████████▊| 41/42 [00:39<00:01,  1.13s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001543 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4126
[LightGBM] [Info] Number of data points in the train set: 2223, number of used features: 69
[LightGBM] [Info] Start training from score 3.706928


100%|██████████| 42/42 [00:39<00:00,  1.05it/s]

Model
LGBMRegressor                            0.61
HistGradientBoostingRegressor            0.61
GradientBoostingRegressor                0.61
RandomForestRegressor                    0.62
ExtraTreesRegressor                      0.63
HuberRegressor                           0.64
TransformedTargetRegressor               0.64
LinearRegression                         0.64
Ridge                                    0.64
LassoLarsIC                              0.64
RidgeCV                                  0.64
LassoCV                                  0.65
LassoLarsCV                              0.65
LarsCV                                   0.65
ElasticNetCV                             0.65
BayesianRidge                            0.65
XGBRegressor                             0.65
LinearSVR                                0.65
AdaBoostRegressor                        0.65
SVR                                      0.66
OrthogonalMatchingPursuit                0.67
OrthogonalMatchingPursuitCV 




In [7]:
X_train.columns = [str(i) for i in range(1, len(X_train.columns) + 1)]

# Rename columns in X_test with numerical names
X_test.columns = [str(i) for i in range(1, len(X_test.columns) + 1)]

In [32]:
import warnings

# Turn off all warnings
# Define the objective function to be minimized
def objective(params):
    # Convert some hyperparameters to integers
    params['num_leaves'] = int(params['num_leaves'])
    params['max_depth'] = int(params['max_depth'])

    # Define the LightGBM dataset
    train_data = lgb.Dataset(X_train, label=y_train)

    # Set hyperparameters
    params['objective'] = 'regression'
    params['metric'] = 'rmse'
    params['verbose'] = -1

    # Train the LightGBM model
    model = lgb.train(params, train_data, num_boost_round=100)

    # Make predictions on the validation set
    predictions = model.predict(X_test)
    # print(y_test)

    # Calculate RMSE (you can replace this with your own evaluation metric)
    rmse = np.sqrt(np.mean((predictions - y_test['score']) ** 2))

    # Return the value to be minimized (in this case, RMSE)
    return rmse

# Define the search space for hyperparameters
space = {
    'learning_rate': hp.loguniform('learning_rate', -5, 0),
    'num_leaves': hp.quniform('num_leaves', 20, 100, 1),
    'max_depth': hp.quniform('max_depth', 5, 20, 1),
    'subsample': hp.uniform('subsample', 0.5, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
}

# Specify the optimization algorithm (Tree-structured Parzen Estimator)
tpe_algorithm = tpe.suggest

# Create Trials object to store optimization results
trials = Trials()

# Run the optimization
best = fmin(fn=objective,
            space=space,
            algo=tpe_algorithm,
            trials=trials,
            max_evals=100)  # You can adjust the number of evaluations

# Print the best hyperparameters
print("Best Hyperparameters:", best)


100%|██████████| 100/100 [00:36<00:00,  2.77trial/s, best loss: 0.5980044944652307]
Best Hyperparameters: {'colsample_bytree': 0.5157206647734114, 'learning_rate': 0.05003214030615743, 'max_depth': 14.0, 'num_leaves': 57.0, 'subsample': 0.6297720913065918}


In [9]:
X_train.shape

(2223, 146)

In [10]:
X_test.shape

(248, 146)

In [26]:
from hyperopt import fmin, tpe, hp, Trials
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingRegressor, GradientBoostingRegressor, RandomForestRegressor, ExtraTreesRegressor
from sklearn.metrics import mean_squared_error

In [29]:
# Define the objective function to be minimized
def objective(params, model):
    # Set hyperparameters
    params['max_depth'] = int(params['max_depth'])
    params['max_iter'] = int(params['max_iter'])
    model.set_params(**params)

    # Train the model
    model.fit(X_train, y_train)

    # Make predictions on the validation set
    predictions = model.predict(X_test)

    # Calculate RMSE (you can replace this with your own evaluation metric)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))

    # Return the value to be minimized (in this case, RMSE)
    return rmse

# Define the search space for hyperparameters
space = {
    'learning_rate': hp.loguniform('learning_rate', -5, 0),
    'max_depth': hp.quniform('max_depth', 3, 10, 1),
    'subsample': hp.uniform('subsample', 0.5, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
}

# Specify the optimization algorithm (Tree-structured Parzen Estimator)
tpe_algorithm = tpe.suggest

# Create Trials object to store optimization results
trials = Trials()

# Hyperparameter tuning for HistGradientBoostingRegressor
hist_params = {
    'learning_rate': hp.loguniform('learning_rate', -5, 0),
    'max_depth': hp.quniform('max_depth', 3, 20, 1),
    'max_iter': hp.quniform('max_iter', 50, 200, 1),
}
best_hist = fmin(fn=lambda params: objective(params, HistGradientBoostingRegressor()),
                 space=hist_params,
                 algo=tpe_algorithm,
                 trials=trials,
                 max_evals=50)

# Hyperparameter tuning for GradientBoostingRegressor
gb_params = {
    'learning_rate': hp.loguniform('learning_rate', -5, 0),
    'max_depth': hp.quniform('max_depth', 3, 20, 1),
    'n_estimators': hp.quniform('n_estimators', 50, 200, 1),
}
best_gb = fmin(fn=lambda params: objective(params, GradientBoostingRegressor()),
               space=gb_params,
               algo=tpe_algorithm,
               trials=trials,
               max_evals=50)

# Hyperparameter tuning for RandomForestRegressor
rf_params = {
    'n_estimators': hp.quniform('n_estimators', 50, 200, 1),
    'max_depth': hp.quniform('max_depth', 3, 20, 1),
}
best_rf = fmin(fn=lambda params: objective(params, RandomForestRegressor()),
               space=rf_params,
               algo=tpe_algorithm,
               trials=trials,
               max_evals=50)

# Hyperparameter tuning for ExtraTreesRegressor
et_params = {
    'n_estimators': hp.quniform('n_estimators', 50, 200, 1),
    'max_depth': hp.quniform('max_depth', 3, 20, 1),
}
best_et = fmin(fn=lambda params: objective(params, ExtraTreesRegressor()),
               space=et_params,
               algo=tpe_algorithm,
               trials=trials,
               max_evals=50)

# Print the best hyperparameters for each regressor
print("Best Hyperparameters for HistGradientBoostingRegressor:", best_hist)
print("Best Hyperparameters for GradientBoostingRegressor:", best_gb)
print("Best Hyperparameters for RandomForestRegressor:", best_rf)
print("Best Hyperparameters for ExtraTreesRegressor:", best_et)

100%|██████████| 50/50 [01:34<00:00,  1.89s/trial, best loss: 0.6006490179879249]
100%|██████████| 50/50 [00:00<?, ?trial/s, best loss=?]
100%|██████████| 50/50 [00:00<?, ?trial/s, best loss=?]
100%|██████████| 50/50 [00:00<?, ?trial/s, best loss=?]
Best Hyperparameters for HistGradientBoostingRegressor: {'learning_rate': 0.0959216871441787, 'max_depth': 5.0, 'max_iter': 171.0}
Best Hyperparameters for GradientBoostingRegressor: {'learning_rate': 0.0959216871441787, 'max_depth': 5.0, 'max_iter': 171.0}
Best Hyperparameters for RandomForestRegressor: {'learning_rate': 0.0959216871441787, 'max_depth': 5.0, 'max_iter': 171.0}
Best Hyperparameters for ExtraTreesRegressor: {'learning_rate': 0.0959216871441787, 'max_depth': 5.0, 'max_iter': 171.0}
