In [8]:
import xgboost as xgb
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import mlflow
import hyperopt

sns.set_theme()

In [2]:
%cd ../..

C:\Users\devsn\Desktop\courses\MLOps Zoomcamp


In [3]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("iris-classification")

2023/07/16 01:44:13 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/07/16 01:44:13 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
2023/07/16 01:44:13 INFO mlflow.tracking.fluent: Experiment with name 'iris-classification' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///C:/Users/devsn/Desktop/courses/MLOps Zoomcamp/mlruns/3', creation_time=1689452053378, experiment_id='3', last_update_time=1689452053378, lifecycle_stage='active', name='iris-classification', tags={}>

In [4]:
X, y = load_iris(return_X_y=True)

X.shape, y.shape

((150, 4), (150,))

In [5]:
type(X)

numpy.ndarray

In [7]:
X[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [16]:
np.unique(y)

array([0, 1, 2])

In [11]:
X_train, X_val, y_train, y_val = train_test_split(X, y, 
                                                    test_size=0.25, 
                                                    random_state=42)

In [12]:
dtrain = xgb.DMatrix(X_train, y_train)
dval = xgb.DMatrix(X_val, y_val)

In [19]:
from hyperopt import fmin, hp, tpe, Trials, STATUS_OK
from hyperopt.pyll import scope
from sklearn.metrics import mean_squared_error

In [29]:
exp(-1), exp(0)

(0.36787944117144233, 1.0)

In [30]:
space = {
    "max_depth": scope.int(hp.quniform('max_depth', 5.5, 20.5, 1)),
    "eta": hp.loguniform('eta', -3, 0), 
    "subsample": hp.uniform('subsample', 0.5, 1),
    "lambda": hp.loguniform('lambda', -5, 0),
    "alpha": hp.loguniform('alpha', -5, 0),
    "num_class": 3,
    "objective": "multi:softmax",
    "seed": 42
}


watchlist = [(dval, "eval"), (dtrain, "train")]
# eval_metric = "mlogloss"
num_round = 100 # number of boosting rounds = number of estimators
early_stopping_rounds = 50

def objective(params):
    with mlflow.start_run():
        
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        
        booster = xgb.train(
            params=params,
            dtrain=dtrain,
            num_boost_round=num_round,
            evals=watchlist,
            early_stopping_rounds=early_stopping_rounds
        )
        
        y_pred = booster.predict(dval)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        
        mlflow.log_metric("RMSE", rmse)
    
    return {"loss": rmse, "status": STATUS_OK}


trials = Trials()

In [31]:
best = fmin(
    fn=objective,
    space=space,
    algo=tpe.suggest,
    trials=trials,
    max_evals=10
)

[0]	eval-mlogloss:0.97744	train-mlogloss:0.98287                                                                               
[1]	eval-mlogloss:0.87523	train-mlogloss:0.88348                                                                               
[2]	eval-mlogloss:0.78838	train-mlogloss:0.79995                                                                               
[3]	eval-mlogloss:0.71195	train-mlogloss:0.72689                                                                               
[4]	eval-mlogloss:0.64491	train-mlogloss:0.66179                                                                               
[5]	eval-mlogloss:0.58144	train-mlogloss:0.60337                                                                               
[6]	eval-mlogloss:0.52672	train-mlogloss:0.55029                                                                               
[7]	eval-mlogloss:0.47821	train-mlogloss:0.50474                                                        

[64]	eval-mlogloss:0.02246	train-mlogloss:0.03805                                                                              
[65]	eval-mlogloss:0.02189	train-mlogloss:0.03763                                                                              
[66]	eval-mlogloss:0.02185	train-mlogloss:0.03739                                                                              
[67]	eval-mlogloss:0.02164	train-mlogloss:0.03703                                                                              
[68]	eval-mlogloss:0.02168	train-mlogloss:0.03675                                                                              
[69]	eval-mlogloss:0.02120	train-mlogloss:0.03636                                                                              
[70]	eval-mlogloss:0.02120	train-mlogloss:0.03582                                                                              
[71]	eval-mlogloss:0.02129	train-mlogloss:0.03557                                                       

[28]	eval-mlogloss:0.01818	train-mlogloss:0.03176                                                                              
[29]	eval-mlogloss:0.01790	train-mlogloss:0.03144                                                                              
[30]	eval-mlogloss:0.01741	train-mlogloss:0.03094                                                                              
[31]	eval-mlogloss:0.01742	train-mlogloss:0.03059                                                                              
[32]	eval-mlogloss:0.01735	train-mlogloss:0.03003                                                                              
[33]	eval-mlogloss:0.01676	train-mlogloss:0.02951                                                                              
[34]	eval-mlogloss:0.01640	train-mlogloss:0.02920                                                                              
[35]	eval-mlogloss:0.01623	train-mlogloss:0.02871                                                       

[92]	eval-mlogloss:0.01161	train-mlogloss:0.02187                                                                              
[93]	eval-mlogloss:0.01154	train-mlogloss:0.02182                                                                              
[94]	eval-mlogloss:0.01153	train-mlogloss:0.02178                                                                              
[95]	eval-mlogloss:0.01151	train-mlogloss:0.02173                                                                              
[96]	eval-mlogloss:0.01149	train-mlogloss:0.02170                                                                              
[97]	eval-mlogloss:0.01135	train-mlogloss:0.02162                                                                              
[98]	eval-mlogloss:0.01124	train-mlogloss:0.02158                                                                              
[99]	eval-mlogloss:0.01123	train-mlogloss:0.02156                                                       

[56]	eval-mlogloss:0.01310	train-mlogloss:0.04387                                                                              
[57]	eval-mlogloss:0.01275	train-mlogloss:0.04316                                                                              
[58]	eval-mlogloss:0.01356	train-mlogloss:0.04328                                                                              
[59]	eval-mlogloss:0.01383	train-mlogloss:0.04340                                                                              
[60]	eval-mlogloss:0.01326	train-mlogloss:0.04304                                                                              
[61]	eval-mlogloss:0.01361	train-mlogloss:0.04254                                                                              
[62]	eval-mlogloss:0.01292	train-mlogloss:0.04240                                                                              
[63]	eval-mlogloss:0.01339	train-mlogloss:0.04257                                                       

[20]	eval-mlogloss:0.02355	train-mlogloss:0.03717                                                                              
[21]	eval-mlogloss:0.02387	train-mlogloss:0.03746                                                                              
[22]	eval-mlogloss:0.02404	train-mlogloss:0.03771                                                                              
[23]	eval-mlogloss:0.02667	train-mlogloss:0.03802                                                                              
[24]	eval-mlogloss:0.02621	train-mlogloss:0.03642                                                                              
[25]	eval-mlogloss:0.01984	train-mlogloss:0.03451                                                                              
[26]	eval-mlogloss:0.02058	train-mlogloss:0.03554                                                                              
[27]	eval-mlogloss:0.01972	train-mlogloss:0.03539                                                       

[84]	eval-mlogloss:0.01184	train-mlogloss:0.02611                                                                              
[85]	eval-mlogloss:0.01298	train-mlogloss:0.02619                                                                              
[86]	eval-mlogloss:0.01612	train-mlogloss:0.02682                                                                              
[87]	eval-mlogloss:0.01526	train-mlogloss:0.02593                                                                              
[88]	eval-mlogloss:0.01465	train-mlogloss:0.02570                                                                              
[89]	eval-mlogloss:0.01458	train-mlogloss:0.02571                                                                              
[90]	eval-mlogloss:0.01457	train-mlogloss:0.02613                                                                              
[91]	eval-mlogloss:0.01306	train-mlogloss:0.02594                                                       

[48]	eval-mlogloss:0.01522	train-mlogloss:0.03803                                                                              
[49]	eval-mlogloss:0.01524	train-mlogloss:0.03782                                                                              
[50]	eval-mlogloss:0.01502	train-mlogloss:0.03779                                                                              
[51]	eval-mlogloss:0.01594	train-mlogloss:0.03727                                                                              
[52]	eval-mlogloss:0.01597	train-mlogloss:0.03702                                                                              
[53]	eval-mlogloss:0.01435	train-mlogloss:0.03662                                                                              
[54]	eval-mlogloss:0.01385	train-mlogloss:0.03638                                                                              
[55]	eval-mlogloss:0.01310	train-mlogloss:0.03615                                                       

[12]	eval-mlogloss:0.26848	train-mlogloss:0.30616                                                                              
[13]	eval-mlogloss:0.24367	train-mlogloss:0.28162                                                                              
[14]	eval-mlogloss:0.22370	train-mlogloss:0.25988                                                                              
[15]	eval-mlogloss:0.20511	train-mlogloss:0.24017                                                                              
[16]	eval-mlogloss:0.18842	train-mlogloss:0.22240                                                                              
[17]	eval-mlogloss:0.17385	train-mlogloss:0.20635                                                                              
[18]	eval-mlogloss:0.15953	train-mlogloss:0.19212                                                                              
[19]	eval-mlogloss:0.14632	train-mlogloss:0.17814                                                       

[76]	eval-mlogloss:0.01644	train-mlogloss:0.03119                                                                              
[77]	eval-mlogloss:0.01629	train-mlogloss:0.03106                                                                              
[78]	eval-mlogloss:0.01606	train-mlogloss:0.03076                                                                              
[79]	eval-mlogloss:0.01604	train-mlogloss:0.03060                                                                              
[80]	eval-mlogloss:0.01593	train-mlogloss:0.03046                                                                              
[81]	eval-mlogloss:0.01583	train-mlogloss:0.03028                                                                              
[82]	eval-mlogloss:0.01572	train-mlogloss:0.03001                                                                              
[83]	eval-mlogloss:0.01553	train-mlogloss:0.02983                                                       

[40]	eval-mlogloss:0.01389	train-mlogloss:0.02826                                                                              
[41]	eval-mlogloss:0.01341	train-mlogloss:0.02838                                                                              
[42]	eval-mlogloss:0.01209	train-mlogloss:0.02839                                                                              
[43]	eval-mlogloss:0.01270	train-mlogloss:0.02860                                                                              
[44]	eval-mlogloss:0.01241	train-mlogloss:0.02811                                                                              
[45]	eval-mlogloss:0.01263	train-mlogloss:0.02826                                                                              
[46]	eval-mlogloss:0.01264	train-mlogloss:0.02720                                                                              
[47]	eval-mlogloss:0.01257	train-mlogloss:0.02697                                                       

[4]	eval-mlogloss:0.77406	train-mlogloss:0.78617                                                                               
[5]	eval-mlogloss:0.72247	train-mlogloss:0.73801                                                                               
[6]	eval-mlogloss:0.67608	train-mlogloss:0.69337                                                                               
[7]	eval-mlogloss:0.63284	train-mlogloss:0.65360                                                                               
[8]	eval-mlogloss:0.59321	train-mlogloss:0.61550                                                                               
[9]	eval-mlogloss:0.55622	train-mlogloss:0.58085                                                                               
[10]	eval-mlogloss:0.52192	train-mlogloss:0.54904                                                                              
[11]	eval-mlogloss:0.49044	train-mlogloss:0.51875                                                       

[68]	eval-mlogloss:0.03778	train-mlogloss:0.06515                                                                              
[69]	eval-mlogloss:0.03649	train-mlogloss:0.06389                                                                              
[70]	eval-mlogloss:0.03597	train-mlogloss:0.06277                                                                              
[71]	eval-mlogloss:0.03522	train-mlogloss:0.06175                                                                              
[72]	eval-mlogloss:0.03419	train-mlogloss:0.06070                                                                              
[73]	eval-mlogloss:0.03336	train-mlogloss:0.06019                                                                              
[74]	eval-mlogloss:0.03243	train-mlogloss:0.05905                                                                              
[75]	eval-mlogloss:0.03139	train-mlogloss:0.05803                                                       

[32]	eval-mlogloss:0.01512	train-mlogloss:0.02682                                                                              
[33]	eval-mlogloss:0.01547	train-mlogloss:0.02647                                                                              
[34]	eval-mlogloss:0.01549	train-mlogloss:0.02637                                                                              
[35]	eval-mlogloss:0.01516	train-mlogloss:0.02626                                                                              
[36]	eval-mlogloss:0.01470	train-mlogloss:0.02597                                                                              
[37]	eval-mlogloss:0.01445	train-mlogloss:0.02553                                                                              
[38]	eval-mlogloss:0.01374	train-mlogloss:0.02528                                                                              
[39]	eval-mlogloss:0.01286	train-mlogloss:0.02524                                                       

[96]	eval-mlogloss:0.01272	train-mlogloss:0.02140                                                                              
[97]	eval-mlogloss:0.01269	train-mlogloss:0.02143                                                                              
[98]	eval-mlogloss:0.01272	train-mlogloss:0.02141                                                                              
[99]	eval-mlogloss:0.01245	train-mlogloss:0.02132                                                                              
[0]	eval-mlogloss:0.45316	train-mlogloss:0.48473                                                                               
[1]	eval-mlogloss:0.24261	train-mlogloss:0.28965                                                                               
[2]	eval-mlogloss:0.14512	train-mlogloss:0.19921                                                                               
[3]	eval-mlogloss:0.09906	train-mlogloss:0.15139                                                        

[60]	eval-mlogloss:0.02137	train-mlogloss:0.05092                                                                              
[61]	eval-mlogloss:0.02163	train-mlogloss:0.05067                                                                              
[62]	eval-mlogloss:0.02154	train-mlogloss:0.05067                                                                              
[63]	eval-mlogloss:0.02154	train-mlogloss:0.05067                                                                              
[64]	eval-mlogloss:0.02210	train-mlogloss:0.05039                                                                              
[65]	eval-mlogloss:0.02182	train-mlogloss:0.05008                                                                              
[66]	eval-mlogloss:0.02219	train-mlogloss:0.04966                                                                              
[67]	eval-mlogloss:0.02204	train-mlogloss:0.04948                                                       

In [32]:
trials.results

[{'loss': 0.0, 'status': 'ok'},
 {'loss': 0.0, 'status': 'ok'},
 {'loss': 0.0, 'status': 'ok'},
 {'loss': 0.0, 'status': 'ok'},
 {'loss': 0.0, 'status': 'ok'},
 {'loss': 0.0, 'status': 'ok'},
 {'loss': 0.0, 'status': 'ok'},
 {'loss': 0.0, 'status': 'ok'},
 {'loss': 0.0, 'status': 'ok'},
 {'loss': 0.0, 'status': 'ok'}]

In [33]:
trials.best_trial

{'state': 2,
 'tid': 0,
 'spec': None,
 'result': {'loss': 0.0, 'status': 'ok'},
 'misc': {'tid': 0,
  'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'workdir': None,
  'idxs': {'alpha': [0],
   'eta': [0],
   'lambda': [0],
   'max_depth': [0],
   'subsample': [0]},
  'vals': {'alpha': [0.007820051655776247],
   'eta': [0.08707059692675438],
   'lambda': [0.10721982631307697],
   'max_depth': [10.0],
   'subsample': [0.7419833312630884]}},
 'exp_key': None,
 'owner': None,
 'version': 0,
 'book_time': datetime.datetime(2023, 7, 15, 21, 52, 43, 366000),
 'refresh_time': datetime.datetime(2023, 7, 15, 21, 52, 43, 803000)}

In [39]:
best['max_depth'] = int(best['max_depth'])

best

{'alpha': 0.007820051655776247,
 'eta': 0.08707059692675438,
 'lambda': 0.10721982631307697,
 'max_depth': 10,
 'subsample': 0.7419833312630884}

In [40]:
booster = xgb.train(
    params=best,
    dtrain=dtrain,
    num_boost_round=num_round,
    evals=watchlist,
    early_stopping_rounds=early_stopping_rounds
)

y_pred = booster.predict(dval)
print(y_pred[:5])

rmse = mean_squared_error(y_val, y_pred, squared=False)
print(f"RMSE: {rmse}")

[0]	eval-rmse:0.85645	train-rmse:0.88062
[1]	eval-rmse:0.78224	train-rmse:0.80386
[2]	eval-rmse:0.71447	train-rmse:0.73762
[3]	eval-rmse:0.65254	train-rmse:0.67639
[4]	eval-rmse:0.59524	train-rmse:0.62256
[5]	eval-rmse:0.54363	train-rmse:0.56922
[6]	eval-rmse:0.49646	train-rmse:0.52099
[7]	eval-rmse:0.45300	train-rmse:0.47846
[8]	eval-rmse:0.42258	train-rmse:0.44221
[9]	eval-rmse:0.38644	train-rmse:0.40625
[10]	eval-rmse:0.35349	train-rmse:0.37336
[11]	eval-rmse:0.32345	train-rmse:0.34443
[12]	eval-rmse:0.29589	train-rmse:0.31478
[13]	eval-rmse:0.27093	train-rmse:0.28960
[14]	eval-rmse:0.24803	train-rmse:0.26613
[15]	eval-rmse:0.22723	train-rmse:0.24522
[16]	eval-rmse:0.20798	train-rmse:0.22479
[17]	eval-rmse:0.18992	train-rmse:0.20822
[18]	eval-rmse:0.17718	train-rmse:0.19197
[19]	eval-rmse:0.16468	train-rmse:0.17718
[20]	eval-rmse:0.15167	train-rmse:0.16415
[21]	eval-rmse:0.13977	train-rmse:0.15014
[22]	eval-rmse:0.12914	train-rmse:0.13748
[23]	eval-rmse:0.11770	train-rmse:0.12691
[2

In [41]:
y_pred_round = np.round(y_pred)
y_pred_round[:5]

array([1., 0., 2., 1., 1.], dtype=float32)

In [42]:
y_val[:5]

array([1, 0, 2, 1, 1])