In [13]:
# diagnostics
import numpy as np
from datetime import datetime, timedelta
# testing models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score
import tests.test_data as test_data
# hyperparameter optimization
import ray.tune as tune
# testing utils
import scripts.utils as utils
# testing write
import joblib
import os

from sklearn.datasets import make_classification
X, y = make_classification(n_samples=11000, n_features=1000, n_informative=50, n_redundant=0, n_classes=2, class_sep=2.5)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

# normalization
normalizer = StandardScaler()
normalizer.fit(X_train)

X_train = normalizer.transform(X_train)
X_test = normalizer.transform(X_test)

# testing hyperopt optimize methods
space = {'max_iter': tune.quniform(10, 10000, 10),
            'tol': tune.loguniform(1e-5, 1e-1),
            'C': tune.loguniform(0.001, 1000.0)
            }
data_dict = {'trainx': X_train,
                'testx': X_test,
                'trainy': y_train,
                'testy': y_test
                }

In [26]:
from sklearn import linear_model
from sklearn.metrics import precision_score, recall_score

def fresh_start(params, data_dict):
    '''
    Required method for hyperopt optimization.
    Trains and tests a fresh logistic regression model
    with given input parameters.
    This method does not overwrite self.model (self.optimize() does).
    Inputs:
    params: dictionary of logistic regression input functions.
        keys max_iter, tol, and C supported.
    data_dict: compact data representation with the four requisite
        data structures used for training and testing a model.
        keys trainx, trainy, testx, and testy required.
    '''

    # unpack data
    trainx = data_dict['trainx']
    trainy = data_dict['trainy']
    testx = data_dict['testx']
    testy = data_dict['testy']
    # supervised logistic regression
    clf = linear_model.LogisticRegression(
            random_state=0,
            max_iter=params['max_iter'],
            tol=params['tol'],
            C=params['C']
            )
    # train and test model
    clf.fit(trainx, trainy)
    # uses balanced_accuracy accounts for class imbalanced data
    pred = clf.predict(testx)
    acc = balanced_accuracy_score(testy, pred)
    rec = recall_score(testy, pred)
    prec = precision_score(testy, pred)

    # loss function minimizes misclassification
    return {'score': acc+rec+prec,
            'loss': (1-acc) + 20*(1-rec)+(1-prec),
            'model': clf,
            'params': params,
            'accuracy': acc,
            'precision': prec,
            'recall': rec}

In [27]:
from functools import partial
from ray.tune.search.hyperopt import HyperOptSearch
from ray.tune.search import ConcurrencyLimiter

algo = HyperOptSearch()
algo = ConcurrencyLimiter(algo, max_concurrent=4)

fmin_objective = partial(fresh_start, data_dict=data_dict)
tuner = tune.Tuner(
    fmin_objective, param_space=space, tune_config=tune.TuneConfig(num_samples=10, metric='score', mode='max', search_alg=algo)
)

In [28]:
results = tuner.fit()

best_result = results.get_best_result()  # Get best result object
best_config = best_result.config  # Get best trial's hyperparameters
best_logdir = best_result.log_dir  # Get best trial's logdir
best_checkpoint = best_result.checkpoint  # Get best trial's best checkpoint
best_metrics = best_result.metrics  # Get best trial's last results
best_result_df = best_result.metrics_dataframe  # Get best result as pandas dataframe

Trial name,status,loc,C,max_iter,tol,iter,total time (s),score,loss,accuracy
fresh_start_b9e48de8,TERMINATED,172.21.93.86:25712,221.308,3540,6.84678e-05,1,4.2281,2.77873,1.70402,0.926372
fresh_start_bcaf0896,TERMINATED,172.21.93.86:25741,0.189275,6920,5.91661e-05,1,0.661553,2.84033,1.22863,0.946824
fresh_start_bdd4f2a8,TERMINATED,172.21.93.86:25748,0.233134,6750,0.0136973,1,0.755042,2.8362,1.26725,0.945461
fresh_start_c083f26a,TERMINATED,172.21.93.86:25804,29.5431,8490,0.000300635,1,2.57576,2.78147,1.68405,0.927281
fresh_start_c16acd84,TERMINATED,172.21.93.86:25833,0.117569,9000,0.00621561,1,1.0258,2.85384,1.1634,0.951373
fresh_start_c3ef62d6,TERMINATED,172.21.93.86:25748,0.850306,7430,5.77503e-05,1,1.59859,2.80747,1.48563,0.935916
fresh_start_c49f36fc,TERMINATED,172.21.93.86:25872,8.65052,2690,4.11847e-05,1,3.10422,2.78562,1.64541,0.928644
fresh_start_c696e676,TERMINATED,172.21.93.86:25804,0.0031039,5100,2.92086e-05,1,0.525298,2.91681,0.652158,0.972277
fresh_start_c7281754,TERMINATED,172.21.93.86:25909,0.0328906,780,0.00156261,1,0.715757,2.87704,1.00227,0.959101
fresh_start_c86b2552,TERMINATED,172.21.93.86:25940,22.7906,5780,0.0027725,1,1.24519,2.78289,1.66538,0.927734




Result for fresh_start_b9e48de8:
  accuracy: 0.9263716574269667
  date: 2022-11-01_17-11-26
  done: false
  experiment_id: da3514c7a8204656a7cb329802368ee6
  hostname: King-George-The-V
  iterations_since_restore: 1
  loss: 1.704023960264476
  model: "LogisticRegression(C=221.3077217918963, max_iter=3540.0, random_state=0,\n\
    \                   tol=6.8467783184126e-05)"
  node_ip: 172.21.93.86
  params:
    C: 221.3077217918963
    max_iter: 3540.0
    tol: 6.8467783184126e-05
  pid: 25712
  precision: 0.9304029304029304
  recall: 0.9219600725952813
  score: 2.7787346604251786
  time_since_restore: 4.228104114532471
  time_this_iter_s: 4.228104114532471
  time_total_s: 4.228104114532471
  timestamp: 1667337086
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: b9e48de8
  warmup_time: 0.003507852554321289
  




Result for fresh_start_b9e48de8:
  accuracy: 0.9263716574269667
  date: 2022-11-01_17-11-26
  done: true
  experiment_id: da3514c7a8204656a7cb329802368ee6
  experiment_tag: 1_C=221.3077,max_iter=3540.0000,tol=0.0001
  hostname: King-George-The-V
  iterations_since_restore: 1
  loss: 1.704023960264476
  model: "LogisticRegression(C=221.3077217918963, max_iter=3540.0, random_state=0,\n\
    \                   tol=6.8467783184126e-05)"
  node_ip: 172.21.93.86
  params:
    C: 221.3077217918963
    max_iter: 3540.0
    tol: 6.8467783184126e-05
  pid: 25712
  precision: 0.9304029304029304
  recall: 0.9219600725952813
  score: 2.7787346604251786
  time_since_restore: 4.228104114532471
  time_this_iter_s: 4.228104114532471
  time_total_s: 4.228104114532471
  timestamp: 1667337086
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: b9e48de8
  warmup_time: 0.003507852554321289
  
Result for fresh_start_bcaf0896:
  accuracy: 0.9468237911530286
  date: 2022-11-01_17-11-28
  done: fa



Result for fresh_start_bdd4f2a8:
  accuracy: 0.9454609767305016
  date: 2022-11-01_17-11-29
  done: false
  experiment_id: ad9ffb018868486b9ac13846a83b75f0
  hostname: King-George-The-V
  iterations_since_restore: 1
  loss: 1.2672518239941235
  model: "LogisticRegression(C=0.23313398718833941, max_iter=6750.0, random_state=0,\n\
    \                   tol=0.013697326625803039)"
  node_ip: 172.21.93.86
  params:
    C: 0.23313398718833941
    max_iter: 6750.0
    tol: 0.013697326625803039
  pid: 25748
  precision: 0.9488117001828154
  recall: 0.941923774954628
  score: 2.836196451867945
  time_since_restore: 0.755042314529419
  time_this_iter_s: 0.755042314529419
  time_total_s: 0.755042314529419
  timestamp: 1667337089
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: bdd4f2a8
  warmup_time: 0.003267049789428711
  
Result for fresh_start_bcaf0896:
  accuracy: 0.9468237911530286
  date: 2022-11-01_17-11-28
  done: true
  experiment_id: 6426518752b044fbb91c2a97bba15922
  



Result for fresh_start_c083f26a:
  accuracy: 0.9272807513413268
  date: 2022-11-01_17-11-35
  done: false
  experiment_id: 27bb168a4db74503a0ecfc96a86d8cc5
  hostname: King-George-The-V
  iterations_since_restore: 1
  loss: 1.6840502951076872
  model: "LogisticRegression(C=29.543062769662203, max_iter=8490.0, random_state=0,\n\
    \                   tol=0.00030063475326946263)"
  node_ip: 172.21.93.86
  params:
    C: 29.543062769662203
    max_iter: 8490.0
    tol: 0.00030063475326946263
  pid: 25804
  precision: 0.9313186813186813
  recall: 0.9228675136116152
  score: 2.7814669462716237
  time_since_restore: 2.5757622718811035
  time_this_iter_s: 2.5757622718811035
  time_total_s: 2.5757622718811035
  timestamp: 1667337095
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: c083f26a
  warmup_time: 0.003150463104248047
  
Result for fresh_start_c16acd84:
  accuracy: 0.951372566520881
  date: 2022-11-01_17-11-35
  done: true
  experiment_id: 057953aa9a4a4e69a3053dea9fc9f



Result for fresh_start_c696e676:
  accuracy: 0.9722767678570838
  date: 2022-11-01_17-11-40
  done: true
  experiment_id: 27bb168a4db74503a0ecfc96a86d8cc5
  experiment_tag: 8_C=0.0031,max_iter=5100.0000,tol=0.0000
  hostname: King-George-The-V
  iterations_since_restore: 1
  loss: 0.6521584597145641
  model: "LogisticRegression(C=0.0031039009824053426, max_iter=5100.0, random_state=0,\n\
    \                   tol=2.920856784232474e-05)"
  node_ip: 172.21.93.86
  params:
    C: 0.0031039009824053426
    max_iter: 5100.0
    tol: 2.920856784232474e-05
  pid: 25804
  precision: 0.9744758432087511
  recall: 0.97005444646098
  score: 2.9168070575268152
  time_since_restore: 0.5252981185913086
  time_this_iter_s: 0.5252981185913086
  time_total_s: 0.5252981185913086
  timestamp: 1667337100
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: c696e676
  warmup_time: 0.003150463104248047
  
Result for fresh_start_c49f36fc:
  accuracy: 0.9286435657638538
  date: 2022-11-01_17-11-4



Result for fresh_start_c3ef62d6:
  accuracy: 0.9359163170787341
  date: 2022-11-01_17-11-37
  done: false
  experiment_id: ad9ffb018868486b9ac13846a83b75f0
  hostname: King-George-The-V
  iterations_since_restore: 1
  loss: 1.4856294709171407
  model: "LogisticRegression(C=0.8503058036376933, max_iter=7430.0, random_state=0,\n\
    \                   tol=5.7750271411559474e-05)"
  node_ip: 172.21.93.86
  params:
    C: 0.8503058036376933
    max_iter: 7430.0
    tol: 5.7750271411559474e-05
  pid: 25748
  precision: 0.939615736505032
  recall: 0.9319419237749547
  score: 2.807473977358721
  time_since_restore: 1.5985937118530273
  time_this_iter_s: 1.5985937118530273
  time_total_s: 1.5985937118530273
  timestamp: 1667337097
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: c3ef62d6
  warmup_time: 0.003267049789428711
  
Result for fresh_start_c7281754:
  accuracy: 0.9591006912419545
  date: 2022-11-01_17-11-44
  done: false
  experiment_id: 8cf205d7366f45a6961b1b2b28f66

2022-11-01 17:11:48,960	INFO tune.py:758 -- Total run time: 32.28 seconds (31.10 seconds for the tuning loop).


In [29]:
best_metrics

{'score': 2.9168070575268152,
 'loss': 0.6521584597145641,
 'model': LogisticRegression(C=0.0031039009824053426, max_iter=5100.0, random_state=0,
                    tol=2.920856784232474e-05),
 'params': {'max_iter': 5100.0,
  'tol': 2.920856784232474e-05,
  'C': 0.0031039009824053426},
 'accuracy': 0.9722767678570838,
 'precision': 0.9744758432087511,
 'recall': 0.97005444646098,
 'time_this_iter_s': 0.5252981185913086,
 'done': True,
 'timesteps_total': None,
 'episodes_total': None,
 'training_iteration': 1,
 'trial_id': 'c696e676',
 'experiment_id': '27bb168a4db74503a0ecfc96a86d8cc5',
 'date': '2022-11-01_17-11-40',
 'timestamp': 1667337100,
 'time_total_s': 0.5252981185913086,
 'pid': 25804,
 'hostname': 'King-George-The-V',
 'node_ip': '172.21.93.86',
 'config': {'max_iter': 5100.0,
  'tol': 2.920856784232474e-05,
  'C': 0.0031039009824053426},
 'time_since_restore': 0.5252981185913086,
 'timesteps_since_restore': 0,
 'iterations_since_restore': 1,
 'warmup_time': 0.003150463104