In [44]:
from hyperopt import tpe
from hyperopt import STATUS_OK
from hyperopt import Trials
from hyperopt import hp 
from hyperopt import fmin
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split 
import pandas as pd  
import mlflow
import numpy as np



df = pd.read_csv("../data/training_data/training_data.csv")
df.head()

df = df.astype('float32')
X = df.iloc[:,:-1]
Y = df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.33,
                                                    random_state=4284, stratify=Y)

## Define Objective function

In [45]:
N_FOLDS = 4
MAX_EVALS = 10

def objective(params, n_folds = N_FOLDS):
    '''Objective function for logistic regression hyperparameter tuning'''
    print(params)
    mlflow.sklearn.autolog()
    with mlflow.start_run(nested=True):
        clf = LogisticRegression(**params, random_state=0)
        scores = cross_val_score(clf, X_train, y_train, cv=N_FOLDS, scoring='f1_macro')
        
        # Extract the best score
        best_score = max(scores)
        
        # loss to be minimized
        loss  = 1 - best_score
        
        # Dictionary with information for evaluation
        return {'loss':loss, 'params':params, 'status':STATUS_OK}

## Define parameter space

In [46]:
space = {
    'warm_start' : hp.choice('warm_start', [True, False]),
    'fit_intercept' : hp.choice('fit_intercept', [True, False]),
    'tol' : hp.uniform('tol', 0.00001, 0.0001),
    'C' : hp.uniform('C', 0.05, 3),
    'solver' : hp.choice('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
    'max_iter' : hp.choice('max_iter', range(5,1000))
}

## Create experiment

In [47]:
mlflow.set_experiment("Hyperopt_Optimization")

## Define Optimization Trials

In [48]:
# Algorithm
tpe_algorithm = tpe.suggest

# Trials object to track progress 
bayes_trials = Trials()

with mlflow.start_run(run_name='hyper_opt_logistic') as run:
    best = fmin(fn=objective, space=space, algo=tpe_algorithm, max_evals=MAX_EVALS, trials=bayes_trials)
    best = {k:float(v) for k,v in best.items()}
    mlflow.log_dict(best, "best_params.json")

{'C': 2.0993097487826495, 'fit_intercept': True, 'max_iter': 960, 'solver': 'newton-cg', 'tol': 6.984187884538946e-05, 'warm_start': False}
 10%|█         | 1/10 [00:00<00:04,  2.00trial/s, best loss: 0.5227875683321228]









{'C': 0.14923719483551007, 'fit_intercept': False, 'max_iter': 823, 'solver': 'sag', 'tol': 6.07286466774121e-05, 'warm_start': False}
{'C': 2.258591403382529, 'fit_intercept': False, 'max_iter': 919, 'solver': 'saga', 'tol': 6.353163011008352e-05, 'warm_start': False}
{'C': 2.3019050187954977, 'fit_intercept': False, 'max_iter': 603, 'solver': 'saga', 'tol': 8.997156375937445e-05, 'warm_start': True}
{'C': 1.9913033020793907, 'fit_intercept': True, 'max_iter': 126, 'solver': 'liblinear', 'tol': 9.579168073201182e-05, 'warm_start': True}
{'C': 2.35842968178011, 'fit_intercept': True, 'max_iter': 903, 'solver': 'liblinear', 'tol': 5.576281255921629e-05, 'warm_start': False}
{'C': 2.143730187957073, 'fit_intercept': False, 'max_iter': 512, 'solver': 'saga', 'tol': 6.200720649242242e-05, 'warm_start': True}
{'C': 1.8413762668401492, 'fit_intercept': True, 'max_iter': 34, 'solver': 'sag', 'tol': 2.769311295457482e-05, 'warm_start': True}
 80%|████████  | 8/10 [00:02<00:00,  3.89trial/s, be







{'C': 0.33478496714041767, 'fit_intercept': False, 'max_iter': 627, 'solver': 'liblinear', 'tol': 9.886002068644733e-05, 'warm_start': True}
{'C': 1.7398555698652765, 'fit_intercept': False, 'max_iter': 41, 'solver': 'newton-cg', 'tol': 1.2318958277358274e-05, 'warm_start': False}
100%|██████████| 10/10 [00:02<00:00,  3.40trial/s, best loss: 0.5188949938949939]












{'C': 2.4097104981817594,
 'fit_intercept': 0.0,
 'max_iter': 549.0,
 'solver': 2.0,
 'tol': 7.741948719282952e-05,
 'warm_start': 1.0}

In [25]:
hyper  = dict(mlflow.get_experiment_by_name("Hyperopt_Optimization"))
experiment_id=hyper['experiment_id']
df = mlflow.search_runs([experiment_id], order_by=["metrics.rmse DESC"])
df

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.f1_score-3_X_test-3,metrics.training_score,metrics.training_accuracy_score,metrics.training_f1_score,...,params.C,params.class_weight,tags.estimator_class,tags.mlflow.parentRunId,tags.mlflow.source.type,tags.estimator_name,tags.mlflow.source.name,tags.mlflow.user,tags.mlflow.log-model.history,tags.mlflow.runName
0,eb05d31ad7364d08a2b3fad167540be7,3,FINISHED,/data/artifacts/3/eb05d31ad7364d08a2b3fad16754...,2022-04-03 07:40:26.824000+00:00,2022-04-03 07:40:34.244000+00:00,0.477212,0.566318,0.566318,0.528268,...,2.170177838396091,,sklearn.linear_model._logistic.LogisticRegression,e3219a9f7d2d41bda39e0bcdfb7ae91f,LOCAL,LogisticRegression,/opt/conda/lib/python3.8/site-packages/ipykern...,david,"[{""run_id"": ""eb05d31ad7364d08a2b3fad167540be7""...",
1,5c20531c99b1478fab7cc97fc0d3325b,3,FINISHED,/data/artifacts/3/5c20531c99b1478fab7cc97fc0d3...,2022-04-03 07:40:19.246000+00:00,2022-04-03 07:40:26.666000+00:00,0.479156,0.568935,0.568935,0.525302,...,0.5879326820391236,,sklearn.linear_model._logistic.LogisticRegression,e3219a9f7d2d41bda39e0bcdfb7ae91f,LOCAL,LogisticRegression,/opt/conda/lib/python3.8/site-packages/ipykern...,david,"[{""run_id"": ""5c20531c99b1478fab7cc97fc0d3325b""...",
2,a423d102a9e3445eb038a808d27aa4ec,3,FINISHED,/data/artifacts/3/a423d102a9e3445eb038a808d27a...,2022-04-03 07:40:11.984000+00:00,2022-04-03 07:40:19.091000+00:00,0.481105,0.565445,0.565445,0.527052,...,2.358100538696023,,sklearn.linear_model._logistic.LogisticRegression,e3219a9f7d2d41bda39e0bcdfb7ae91f,LOCAL,LogisticRegression,/opt/conda/lib/python3.8/site-packages/ipykern...,david,"[{""run_id"": ""a423d102a9e3445eb038a808d27aa4ec""...",
3,715638f8283c4fd4a523ef13e668aff6,3,FINISHED,/data/artifacts/3/715638f8283c4fd4a523ef13e668...,2022-04-03 07:40:04.440000+00:00,2022-04-03 07:40:11.824000+00:00,0.477206,0.570681,0.570681,0.528931,...,1.7887263194843137,,sklearn.linear_model._logistic.LogisticRegression,e3219a9f7d2d41bda39e0bcdfb7ae91f,LOCAL,LogisticRegression,/opt/conda/lib/python3.8/site-packages/ipykern...,david,"[{""run_id"": ""715638f8283c4fd4a523ef13e668aff6""...",
4,f9237ceb0fab4dd9986517d1cbf6c412,3,FINISHED,/data/artifacts/3/f9237ceb0fab4dd9986517d1cbf6...,2022-04-03 07:39:56.949000+00:00,2022-04-03 07:40:04.257000+00:00,0.477206,0.570681,0.570681,0.528931,...,1.650397168666255,,sklearn.linear_model._logistic.LogisticRegression,e3219a9f7d2d41bda39e0bcdfb7ae91f,LOCAL,LogisticRegression,/opt/conda/lib/python3.8/site-packages/ipykern...,david,"[{""run_id"": ""f9237ceb0fab4dd9986517d1cbf6c412""...",
5,45a3b36399364e1fa5d7d05b80b1cf47,3,FINISHED,/data/artifacts/3/45a3b36399364e1fa5d7d05b80b1...,2022-04-03 07:39:49.212000+00:00,2022-04-03 07:39:56.792000+00:00,0.477206,0.569808,0.569808,0.528255,...,2.591358911104831,,sklearn.linear_model._logistic.LogisticRegression,e3219a9f7d2d41bda39e0bcdfb7ae91f,LOCAL,LogisticRegression,/opt/conda/lib/python3.8/site-packages/ipykern...,david,"[{""run_id"": ""45a3b36399364e1fa5d7d05b80b1cf47""...",
6,1d8258e8bcc74ab9a76927af96ccf8d3,3,FINISHED,/data/artifacts/3/1d8258e8bcc74ab9a76927af96cc...,2022-04-03 07:39:41.812000+00:00,2022-04-03 07:39:49.060000+00:00,0.477212,0.566318,0.566318,0.528268,...,2.2449974227045306,,sklearn.linear_model._logistic.LogisticRegression,e3219a9f7d2d41bda39e0bcdfb7ae91f,LOCAL,LogisticRegression,/opt/conda/lib/python3.8/site-packages/ipykern...,david,"[{""run_id"": ""1d8258e8bcc74ab9a76927af96ccf8d3""...",
7,bebf1a8313b84a7b8242b1c0c60d7bf8,3,FINISHED,/data/artifacts/3/bebf1a8313b84a7b8242b1c0c60d...,2022-04-03 07:39:34.225000+00:00,2022-04-03 07:39:41.479000+00:00,0.471371,0.572426,0.572426,0.525611,...,0.1912758306070873,,sklearn.linear_model._logistic.LogisticRegression,e3219a9f7d2d41bda39e0bcdfb7ae91f,LOCAL,LogisticRegression,/opt/conda/lib/python3.8/site-packages/ipykern...,david,"[{""run_id"": ""bebf1a8313b84a7b8242b1c0c60d7bf8""...",
8,d3fef944c6904fd58d77dce88f2c4d68,3,FINISHED,/data/artifacts/3/d3fef944c6904fd58d77dce88f2c...,2022-04-03 07:39:26.551000+00:00,2022-04-03 07:39:34.069000+00:00,0.477206,0.569808,0.569808,0.528255,...,2.9740757320342928,,sklearn.linear_model._logistic.LogisticRegression,e3219a9f7d2d41bda39e0bcdfb7ae91f,LOCAL,LogisticRegression,/opt/conda/lib/python3.8/site-packages/ipykern...,david,"[{""run_id"": ""d3fef944c6904fd58d77dce88f2c4d68""...",
9,bc63bc327c1444509be2e145e58ee907,3,FINISHED,/data/artifacts/3/bc63bc327c1444509be2e145e58e...,2022-04-03 07:39:15.305000+00:00,2022-04-03 07:39:26.382000+00:00,0.477147,0.566318,0.566318,0.527194,...,0.7108750750233658,,sklearn.linear_model._logistic.LogisticRegression,e3219a9f7d2d41bda39e0bcdfb7ae91f,LOCAL,LogisticRegression,/opt/conda/lib/python3.8/site-packages/ipykern...,david,"[{""run_id"": ""bc63bc327c1444509be2e145e58ee907""...",
