In [None]:
import mlflow
from hyperopt import tpe, hp, fmin, STATUS_OK, Trials
from hyperopt.pyll import scope
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression

mlflow.set_experiment("boston prediction")

In [None]:
##Search Space
space = { 
    'boosting_type': hp.choice('boosting_type', ['gbdt','goss']),
    'metric': hp.choice('metric',['rmse']),
    'max_depth':scope.int(hp.quniform('max_depth', 2, 16, 1)),
    'min_data_in_leaf': scope.int(hp.quniform('min_data_in_leaf', 30, 150, 1)),
    'num_leaves': scope.int(hp.quniform('num_leaves', 30, 150, 1)),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
    'min_child_samples': hp.quniform('min_child_samples', 20, 500, 5),
    'reg_alpha': hp.uniform('reg_alpha', 0.0, 10),
    'reg_lambda': hp.uniform('reg_lambda', 0.0, 10),
    'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0),
    "feature_pre_filter": hp.choice("feature_pre_filter",[False])
}

In [None]:
# Objective
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2
def objective(params, n_folds = N_FOLDS):
    """Objective function for Light Gradient Boosting Machine Hyperparameter Tuning"""
    
    # Perform n_fold cross validation with hyperparameters
    cv_results = lgb.cv(params, train_set, nfold = n_folds,num_boost_round=400,stratified=False, 
                        early_stopping_rounds = 30, seed = 50)
    print(params)
    print(cv_results)
    # Extract the best score
    best_score = min(cv_results['rmse-mean'])
# Dictionary with information for evaluation
    return {'loss': best_score, 'params': params, 'status': STATUS_OK}

In [None]:
MAX_EVALS = 10
bayes_trials = Trials()
print('optimization starting')
# Optimize
best = fmin(fn = objective, space = space, algo = tpe.suggest, 
                max_evals = MAX_EVALS, trials = bayes_trials)
print('optimization complete')
best_model = bayes_trials.results[np.argmin([r['loss'] for r in 
        bayes_trials.results])]
params=best_model['params']

In [None]:
with mlflow.start_run() as run: 
        # Training LightGBM model
         gbm = lgb.train(params,
                        lgb_train,
                        num_boost_round=500,
                        valid_sets=lgb_eval,
                        early_stopping_rounds=30)

        y_test['pred'] = gbm.predict(x_test, num_iteration=gbm.best_iteration)

        (rmse, mae, r2) = eval_metrics(y_test.iloc[:,3], y_test['pred'])

        y_test['mae']=abs(y_test['target']-y_test['pred'])
        # tracking run parameters
        mlflow.log_param("hyper-parameters", params)
        mlflow.log_param("features", x_train.columns)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)
        mlflow.sklearn.log_model(gbm, "model")
        modelpath = "micro_{}_{}".format(mod,valdate)
        mlflow.lightgbm.save_model(gbm, modelpath)
mlflow.end_run()