In [2]:
import pandas as pd

In [3]:
import xgboost as xgb

In [4]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [11]:
from sklearn.feature_extraction import DictVectorizer

In [135]:
from sklearn.metrics import mean_squared_error

In [45]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("xgboost-autlog-experiment-2")

2023/05/25 08:27:47 INFO mlflow.tracking.fluent: Experiment with name 'xgboost-autlog-experiment-2' does not exist. Creating a new experiment.


<Experiment: artifact_location='./mlruns/3', creation_time=1684983467538, experiment_id='3', last_update_time=1684983467538, lifecycle_stage='active', name='xgboost-autlog-experiment-2', tags={}>

In [37]:
def objective(e):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=30,
            evals=[(valid, "validation")],
            early_stopping_rounds=15
        )
        y_pred = booster.predict(valid)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        
        mlflow.log_metric("rmse", rmse)
        
    return {'loss': rmse, 'status': STATUS_OK}

In [9]:
def read_dataframe(filename):
    if filename.endswith('.csv'):
        df = pd.read_csv(filename)

        df.tpep_dropoff_datetime = pd.to_datetime(df.tpep_dropoff_datetime)
        df.tpep_pickup_datetime = pd.to_datetime(df.tpep_pickup_datetime)
    elif filename.endswith('.parquet'):
        df = pd.read_parquet(filename)

    df['duration'] = df.tpep_dropoff_datetime - df.tpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df

In [53]:
df_train = read_dataframe('/Users/temp/Documents/projects/ml_ops_zoomcamp/data/yellow_tripdata_2022-01.parquet')
df_val = read_dataframe('/Users/temp/Documents/projects/ml_ops_zoomcamp/data/yellow_tripdata_2022-02.parquet')

In [56]:
len(df_train), len(df_val)

(2421440, 2918187)

In [47]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [34]:
df_train_partial = df_train[:100000].copy()
df_val_partial = df_val[:100000].copy()

In [48]:
def prepare_training_and_validation_data(df_train, df_val):
    df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
    df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

    categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
    numerical = ['trip_distance']

    dv = DictVectorizer()

    train_dicts = df_train[categorical + numerical].to_dict(orient='records')
    X_train = dv.fit_transform(train_dicts)

    val_dicts = df_val[categorical + numerical].to_dict(orient='records')
    X_val = dv.transform(val_dicts)

    target = 'duration'
    y_train = df_train[target].values
    y_val = df_val[target].values

    return {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val
    }

In [36]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [15]:
search_scope = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

In [38]:
best_result = fmin(
    fn=objective,
    space=search_scope,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

[0]	validation-rmse:15.01831                                                                                                                                                         
[1]	validation-rmse:13.24024                                                                                                                                                         
[2]	validation-rmse:11.85519                                                                                                                                                         
[3]	validation-rmse:10.78265                                                                                                                                                         
[4]	validation-rmse:9.95407                                                                                                                                                          
[5]	validation-rmse:9.31455                                                               

[25]	validation-rmse:7.28388                                                                                                                                                         
[26]	validation-rmse:7.26788                                                                                                                                                         
[27]	validation-rmse:7.25318                                                                                                                                                         
[28]	validation-rmse:7.24123                                                                                                                                                         
[29]	validation-rmse:7.23079                                                                                                                                                         
[0]	validation-rmse:16.66105                                                              

[20]	validation-rmse:7.20877                                                                                                                                                         
[21]	validation-rmse:7.19010                                                                                                                                                         
[22]	validation-rmse:7.17406                                                                                                                                                         
[23]	validation-rmse:7.16136                                                                                                                                                         
[24]	validation-rmse:7.15170                                                                                                                                                         
[25]	validation-rmse:7.14252                                                              

[15]	validation-rmse:8.53813                                                                                                                                                         
[16]	validation-rmse:8.37483                                                                                                                                                         
[17]	validation-rmse:8.23022                                                                                                                                                         
[18]	validation-rmse:8.10315                                                                                                                                                         
[19]	validation-rmse:7.99082                                                                                                                                                         
[20]	validation-rmse:7.89091                                                              

[10]	validation-rmse:7.18896                                                                                                                                                         
[11]	validation-rmse:7.18993                                                                                                                                                         
[12]	validation-rmse:7.19212                                                                                                                                                         
[13]	validation-rmse:7.19285                                                                                                                                                         
[14]	validation-rmse:7.19468                                                                                                                                                         
[15]	validation-rmse:7.19673                                                              

[11]	validation-rmse:7.60510                                                                                                                                                         
[12]	validation-rmse:7.50568                                                                                                                                                         
[13]	validation-rmse:7.42996                                                                                                                                                         
[14]	validation-rmse:7.37151                                                                                                                                                         
[15]	validation-rmse:7.32796                                                                                                                                                         
[16]	validation-rmse:7.29327                                                              

[6]	validation-rmse:7.08538                                                                                                                                                          
[7]	validation-rmse:7.08396                                                                                                                                                          
[8]	validation-rmse:7.08043                                                                                                                                                          
[9]	validation-rmse:7.07849                                                                                                                                                          
[10]	validation-rmse:7.07828                                                                                                                                                         
[11]	validation-rmse:7.07688                                                              

[2]	validation-rmse:9.49681                                                                                                                                                          
[3]	validation-rmse:8.58356                                                                                                                                                          
[4]	validation-rmse:8.02111                                                                                                                                                          
[5]	validation-rmse:7.67555                                                                                                                                                          
[6]	validation-rmse:7.45451                                                                                                                                                          
[7]	validation-rmse:7.31483                                                               

[28]	validation-rmse:7.04850                                                                                                                                                         
[29]	validation-rmse:7.04770                                                                                                                                                         
[0]	validation-rmse:10.14406                                                                                                                                                         
[1]	validation-rmse:8.03118                                                                                                                                                          
[2]	validation-rmse:7.39302                                                                                                                                                          
[3]	validation-rmse:7.18804                                                               

[25]	validation-rmse:7.09726                                                                                                                                                         
[26]	validation-rmse:7.09689                                                                                                                                                         
[27]	validation-rmse:7.09705                                                                                                                                                         
[28]	validation-rmse:7.09667                                                                                                                                                         
[29]	validation-rmse:7.09608                                                                                                                                                         
[0]	validation-rmse:10.90340                                                              

[20]	validation-rmse:7.06672                                                                                                                                                         
[21]	validation-rmse:7.06669                                                                                                                                                         
[22]	validation-rmse:7.06582                                                                                                                                                         
[23]	validation-rmse:7.06556                                                                                                                                                         
[24]	validation-rmse:7.06553                                                                                                                                                         
[25]	validation-rmse:7.06667                                                              

[20]	validation-rmse:7.07388                                                                                                                                                         
[21]	validation-rmse:7.06840                                                                                                                                                         
[22]	validation-rmse:7.06619                                                                                                                                                         
[23]	validation-rmse:7.06420                                                                                                                                                         
[24]	validation-rmse:7.06237                                                                                                                                                         
[25]	validation-rmse:7.06063                                                              

[15]	validation-rmse:7.07119                                                                                                                                                         
[16]	validation-rmse:7.06493                                                                                                                                                         
[17]	validation-rmse:7.06009                                                                                                                                                         
[18]	validation-rmse:7.05811                                                                                                                                                         
[19]	validation-rmse:7.05605                                                                                                                                                         
[20]	validation-rmse:7.05429                                                              

[10]	validation-rmse:10.11300                                                                                                                                                        
[11]	validation-rmse:9.79782                                                                                                                                                         
[12]	validation-rmse:9.51598                                                                                                                                                         
[13]	validation-rmse:9.26492                                                                                                                                                         
[14]	validation-rmse:9.04087                                                                                                                                                         
[15]	validation-rmse:8.84068                                                              

[5]	validation-rmse:9.65706                                                                                                                                                          
[6]	validation-rmse:9.13227                                                                                                                                                          
[7]	validation-rmse:8.72029                                                                                                                                                          
[8]	validation-rmse:8.39317                                                                                                                                                          
[9]	validation-rmse:8.13459                                                                                                                                                          
[10]	validation-rmse:7.93002                                                              

[10]	validation-rmse:7.10270                                                                                                                                                         
[11]	validation-rmse:7.09258                                                                                                                                                         
[12]	validation-rmse:7.08562                                                                                                                                                         
[13]	validation-rmse:7.08321                                                                                                                                                         
[14]	validation-rmse:7.08082                                                                                                                                                         
[15]	validation-rmse:7.07895                                                              

[9]	validation-rmse:7.24234                                                                                                                                                          
[10]	validation-rmse:7.18998                                                                                                                                                         
[11]	validation-rmse:7.15323                                                                                                                                                         
[12]	validation-rmse:7.12887                                                                                                                                                         
[13]	validation-rmse:7.10830                                                                                                                                                         
[14]	validation-rmse:7.09731                                                              

[4]	validation-rmse:13.64651                                                                                                                                                         
[5]	validation-rmse:13.08853                                                                                                                                                         
[6]	validation-rmse:12.57775                                                                                                                                                         
[7]	validation-rmse:12.10977                                                                                                                                                         
[8]	validation-rmse:11.68174                                                                                                                                                         
[9]	validation-rmse:11.29036                                                              

In [46]:
params = {
    'learning_rate': 0.3795086202637246,
    'max_depth': 55,
    'min_child_weight': 3.3288327276634258,
    'objective': 'reg:linear',
    'reg_alpha': 0.2945298494685302,
    'reg_lambda': 0.05017902854814832,
    'seed': 42
}

mlflow.xgboost.autolog()

booster = xgb.train(
    params=params,
    dtrain=train,
    num_boost_round=30,
    evals=[(valid, "validation")],
    early_stopping_rounds=15
)

2023/05/25 08:28:10 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '2ad84216be84444aa495d9563dcbba74', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


[0]	validation-rmse:12.96097
[1]	validation-rmse:10.44925
[2]	validation-rmse:9.01572
[3]	validation-rmse:8.20427
[4]	validation-rmse:7.73788
[5]	validation-rmse:7.46677
[6]	validation-rmse:7.30463
[7]	validation-rmse:7.20706
[8]	validation-rmse:7.14647
[9]	validation-rmse:7.11023
[10]	validation-rmse:7.08879
[11]	validation-rmse:7.07359
[12]	validation-rmse:7.06572
[13]	validation-rmse:7.05686
[14]	validation-rmse:7.05288
[15]	validation-rmse:7.05060
[16]	validation-rmse:7.04962
[17]	validation-rmse:7.04906
[18]	validation-rmse:7.04825
[19]	validation-rmse:7.04773
[20]	validation-rmse:7.04593
[21]	validation-rmse:7.04492
[22]	validation-rmse:7.04370
[23]	validation-rmse:7.04387
[24]	validation-rmse:7.04307
[25]	validation-rmse:7.04336
[26]	validation-rmse:7.04375
[27]	validation-rmse:7.04330
[28]	validation-rmse:7.04437
[29]	validation-rmse:7.04536




In [58]:
len(df_train), len(df_val)

(2421440, 2918187)

In [54]:
train_val_dict = prepare_training_and_validation_data(df_train, df_val)

In [63]:
train_val_dict['y_train'].shape

(2421440,)

In [51]:
from sklearn.linear_model import LinearRegression

In [66]:
lr = LinearRegression()
lr.fit(train_val_dict['X_train'], train_val_dict['y_train'])

y_pred = lr.predict(train_val_dict['X_val'])

mean_squared_error(train_val_dict['y_val'], y_pred, squared=False)

5.414854617936158

In [68]:
import pickle

In [69]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [70]:
mlflow.set_experiment("lin-reg-model-save-experiment")

2023/05/26 09:35:12 INFO mlflow.tracking.fluent: Experiment with name 'lin-reg-model-save-experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='./mlruns/4', creation_time=1685073912380, experiment_id='4', last_update_time=1685073912380, lifecycle_stage='active', name='lin-reg-model-save-experiment', tags={}>

In [72]:
with mlflow.start_run():
    mlflow.set_tag("developer", "amogh")

    mlflow.log_param("train-data-path", "../../data/yellow_tripdata_2022-01.parquet")
    mlflow.log_param("val-data-path", "../../data/yellow_tripdata_2022-02.parquet")

    lr = LinearRegression()
    lr.fit(train_val_dict['X_train'], train_val_dict['y_train'])

    y_pred = lr.predict(train_val_dict['X_val'])

    rmse = mean_squared_error(y_pred, train_val_dict['y_val'], squared=False)
    
    mlflow.log_metric("rmse", rmse)
    
    mlflow.log_artifact(local_path="models/lin_reg.bin", artifact_path="models/pickle")

In [73]:
mlflow.xgboost.autolog(disable=True)

In [74]:
with mlflow.start_run():
    
    train = xgb.DMatrix(train_val_dict['X_train'], label=train_val_dict['y_train'])
    valid = xgb.DMatrix(train_val_dict['X_val'], label=train_val_dict['y_val'])
    
    params = {
        'learning_rate': 0.3795086202637246,
        'max_depth': 55,
        'min_child_weight': 3.3288327276634258,
        'objective': 'reg:linear',
        'reg_alpha': 0.2945298494685302,
        'reg_lambda': 0.05017902854814832,
        'seed': 42
    }
    
    mlflow.log_params(params)
    
    booster = xgb.train(
        params=params,
        dtrain=train,
        num_boost_round=50,
        evals=[(valid, "validation")],
        early_stopping_rounds=25
    )
    
    y_pred = booster.predict(valid)
    rmse = mean_squared_error(train_val_dict['y_val'], y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)
    
    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")

[0]	validation-rmse:11.35125
[1]	validation-rmse:8.43383
[2]	validation-rmse:6.83550
[3]	validation-rmse:5.99248
[4]	validation-rmse:5.55507
[5]	validation-rmse:5.32212
[6]	validation-rmse:5.19453
[7]	validation-rmse:5.12098
[8]	validation-rmse:5.07598
[9]	validation-rmse:5.04639
[10]	validation-rmse:5.02762
[11]	validation-rmse:5.01497
[12]	validation-rmse:5.00866
[13]	validation-rmse:5.00379
[14]	validation-rmse:5.00017
[15]	validation-rmse:4.99708
[16]	validation-rmse:4.99200
[17]	validation-rmse:4.98981
[18]	validation-rmse:4.98808
[19]	validation-rmse:4.98630
[20]	validation-rmse:4.98466
[21]	validation-rmse:4.98275
[22]	validation-rmse:4.98103
[23]	validation-rmse:4.97955
[24]	validation-rmse:4.97799
[25]	validation-rmse:4.97643
[26]	validation-rmse:4.97401
[27]	validation-rmse:4.97295
[28]	validation-rmse:4.97183
[29]	validation-rmse:4.97037
[30]	validation-rmse:4.96936
[31]	validation-rmse:4.96829
[32]	validation-rmse:4.96712
[33]	validation-rmse:4.96603
[34]	validation-rmse:4.

In [75]:
logged_model = 'runs:/0487df4008cd4f838aad1e07eb1d8d5d/models_mlflow'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)



In [76]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: models_mlflow
  flavor: mlflow.xgboost
  run_id: 0487df4008cd4f838aad1e07eb1d8d5d

In [77]:
xgboost_model = mlflow.xgboost.load_model(logged_model)



In [80]:
y_pred = xgboost_model.predict(valid)

In [81]:
y_pred

array([14.550037, 19.915255, 28.732475, ..., 15.989329, 10.005005,
        8.948286], dtype=float32)

In [None]:
# Training GradientBoosting Regressor Algo for model registry video

In [82]:
from sklearn.ensemble import GradientBoostingRegressor

In [91]:
mlflow.set_experiment("model-registry-experiment-1")

2023/05/27 18:26:08 INFO mlflow.tracking.fluent: Experiment with name 'model-registry-experiment-1' does not exist. Creating a new experiment.


<Experiment: artifact_location='./mlruns/5', creation_time=1685192168608, experiment_id='5', last_update_time=1685192168608, lifecycle_stage='active', name='model-registry-experiment-1', tags={}>

In [98]:
with mlflow.start_run():
    
    learning_rate = 0.3795086202637246
    n_estimators = 50
    min_samples_leaf = 4
    max_depth = 55
    
    mlflow.log_param("learning_rate", learning_rate)
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("min_samples_leaf", min_samples_leaf)
    mlflow.log_param("max_depth", max_depth)

    # params for gradient boosting is same as Xgboost parameter. Trying to train a new model for model registry.
    # Not looking for accurate models.
    gradient_model = GradientBoostingRegressor(
        learning_rate=learning_rate,
        n_estimators=n_estimators,
        min_samples_leaf=min_samples_leaf,
        max_depth=max_depth
    )
    
    gradient_model.fit(X_train, y_train)
    
    with open('models/gradient_boosting_model.bin', 'wb') as f_out:
        pickle.dump(gradient_model, f_out)
    
    y_pred = gradient_model.predict(X_val)
    
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    
    mlflow.log_metric("rmse", rmse)
        
    mlflow.sklearn.log_model(gradient_model, artifact_path="models/pickle")

In [90]:
rmse

7.054120161913282

In [93]:
with mlflow.start_run():
    
    train = xgb.DMatrix(train_val_dict['X_train'], label=train_val_dict['y_train'])
    valid = xgb.DMatrix(train_val_dict['X_val'], label=train_val_dict['y_val'])
    
    params = {
        'learning_rate': 0.3795086202637246,
        'max_depth': 55,
        'min_child_weight': 3.3288327276634258,
        'objective': 'reg:linear',
        'reg_alpha': 0.2945298494685302,
        'reg_lambda': 0.05017902854814832,
        'seed': 42
    }
    
    mlflow.log_params(params)
    
    booster = xgb.train(
        params=params,
        dtrain=train,
        num_boost_round=50,
        evals=[(valid, "validation")],
        early_stopping_rounds=25
    )
    
    y_pred = booster.predict(valid)
    rmse = mean_squared_error(train_val_dict['y_val'], y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)
    
    mlflow.xgboost.log_model(booster, artifact_path="models_mlflow")

[0]	validation-rmse:11.35125
[1]	validation-rmse:8.43383
[2]	validation-rmse:6.83550
[3]	validation-rmse:5.99248
[4]	validation-rmse:5.55507
[5]	validation-rmse:5.32212
[6]	validation-rmse:5.19453
[7]	validation-rmse:5.12098
[8]	validation-rmse:5.07598
[9]	validation-rmse:5.04639
[10]	validation-rmse:5.02762
[11]	validation-rmse:5.01497
[12]	validation-rmse:5.00866
[13]	validation-rmse:5.00379
[14]	validation-rmse:5.00017
[15]	validation-rmse:4.99708
[16]	validation-rmse:4.99200
[17]	validation-rmse:4.98981
[18]	validation-rmse:4.98808
[19]	validation-rmse:4.98630
[20]	validation-rmse:4.98466
[21]	validation-rmse:4.98275
[22]	validation-rmse:4.98103
[23]	validation-rmse:4.97955
[24]	validation-rmse:4.97799
[25]	validation-rmse:4.97643
[26]	validation-rmse:4.97401
[27]	validation-rmse:4.97295
[28]	validation-rmse:4.97183
[29]	validation-rmse:4.97037
[30]	validation-rmse:4.96936
[31]	validation-rmse:4.96829
[32]	validation-rmse:4.96712
[33]	validation-rmse:4.96603
[34]	validation-rmse:4.

In [97]:
with mlflow.start_run():
    
    lr = LinearRegression()
    lr.fit(train_val_dict['X_train'], train_val_dict['y_train'])
    
    with open('models/linear_regression.bin', 'wb') as f_out:
        pickle.dump(gradient_model, f_out)
    

    y_pred = lr.predict(train_val_dict['X_val'])

    rmse = mean_squared_error(train_val_dict['y_val'], y_pred, squared=False)
    
    mlflow.log_metric("rmse", rmse)
    
    mlflow.sklearn.log_model(lr, artifact_path="models_mlflow")
    

In [96]:
mlflow.sklearn

<module 'mlflow.sklearn' from '/Users/temp/Documents/projects/ml_ops_zoomcamp/exp-tracking-env/lib/python3.7/site-packages/mlflow/sklearn/__init__.py'>

In [99]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = 'sqlite:///mlflow.db'

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [100]:
client.list_experiments()

  """Entry point for launching an IPython kernel.


[<Experiment: artifact_location='./mlruns/0', creation_time=1684809513150, experiment_id='0', last_update_time=1684809513150, lifecycle_stage='active', name='Default', tags={}>,
 <Experiment: artifact_location='./mlruns/1', creation_time=1684849967310, experiment_id='1', last_update_time=1684849967310, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>,
 <Experiment: artifact_location='./mlruns/2', creation_time=1684949192625, experiment_id='2', last_update_time=1684949192625, lifecycle_stage='active', name='xgboost-autlog-experiment', tags={}>,
 <Experiment: artifact_location='./mlruns/3', creation_time=1684983467538, experiment_id='3', last_update_time=1684983467538, lifecycle_stage='active', name='xgboost-autlog-experiment-2', tags={}>,
 <Experiment: artifact_location='./mlruns/4', creation_time=1685073912380, experiment_id='4', last_update_time=1685073912380, lifecycle_stage='active', name='lin-reg-model-save-experiment', tags={}>,
 <Experiment: artifact_location='./mlr

In [101]:
client.create_experiment(name="my-cool-experiment")

'6'

In [117]:
from mlflow.entities import ViewType

runs = client.search_runs(
    experiment_ids='5',
    filter_string="metrics.rmse < 9",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=10,
    order_by=["metrics.rmse ASC"]
)

for run in runs:
    print(f"run_id={run.info.run_id}, rmse={run.data.metrics['rmse']:.4f}")

In [118]:
for run in runs:
    print(f"run_id={run.info.run_id}, rmse={run.data.metrics['rmse']:.4f}")

run_id=5823f93e73b149c388e8a738e4b7a80b, rmse=4.9489
run_id=0614c1cd69914ab59ec99a136d1084b7, rmse=5.4149
run_id=44c430ceb9924bd59bb30f3db052d670, rmse=5.4149
run_id=238d460301264dd89be4831ef90e5d8d, rmse=7.0541
run_id=209d3c6107e343c3b86657161f7c45de, rmse=7.0541


In [119]:
run_id = '209d3c6107e343c3b86657161f7c45de'
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri=model_uri, name="nyc-taxi-aggressor")

Registered model 'nyc-taxi-aggressor' already exists. Creating a new version of this model...
2023/05/28 16:52:58 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: nyc-taxi-aggressor, version 3
Created version '3' of model 'nyc-taxi-aggressor'.


<ModelVersion: creation_timestamp=1685272978676, current_stage='None', description=None, last_updated_timestamp=1685272978676, name='nyc-taxi-aggressor', run_id='209d3c6107e343c3b86657161f7c45de', run_link=None, source='./mlruns/5/209d3c6107e343c3b86657161f7c45de/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [120]:
model_uri

'runs:/209d3c6107e343c3b86657161f7c45de/model'

In [121]:
model_name = "nyc-taxi-aggressor"
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f"version: {version.version}, stage: {version.current_stage}")

version: 1, stage: Production
version: 2, stage: Staging
version: 3, stage: None


In [122]:
client.transition_model_version_stage(
    name=model_name,
    version=3,
    stage="Staging",
    archive_existing_versions=False
)

<ModelVersion: creation_timestamp=1685272978676, current_stage='Staging', description=None, last_updated_timestamp=1685273650074, name='nyc-taxi-aggressor', run_id='209d3c6107e343c3b86657161f7c45de', run_link=None, source='./mlruns/5/209d3c6107e343c3b86657161f7c45de/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [124]:
from datetime import datetime
current_date = datetime.now().date().strftime("%Y-%m-%d")
client.update_model_version(
    version=3,
    name=model_name,
    description=f"The model version 3 was transitioned to Staging on {current_date}"
)

<ModelVersion: creation_timestamp=1685272978676, current_stage='Staging', description='The model version 3 was transitioned to Staging on 2023-05-28', last_updated_timestamp=1685273847632, name='nyc-taxi-aggressor', run_id='209d3c6107e343c3b86657161f7c45de', run_link=None, source='./mlruns/5/209d3c6107e343c3b86657161f7c45de/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

---------- Module 2.6 MLFlow in practice ----------------------

In [142]:
import os

os.environ["AWS_PROFILE"] = "default"

In [125]:
TRACKING_URI = "ec2-3-141-4-192.us-east-2.compute.amazonaws.com"

In [126]:
mlflow.set_tracking_uri(f"http://{TRACKING_URI}:5000")

In [127]:
print(f"tracking URI = {mlflow.get_tracking_uri()}")

tracking URI = http://ec2-3-141-4-192.us-east-2.compute.amazonaws.com:5000


In [131]:
mlflow.get_tracking_uri()

'http://ec2-3-141-4-192.us-east-2.compute.amazonaws.com:5000'

In [133]:
mlflow.search_experiments()

[<Experiment: artifact_location='s3://mlflow-artifacts-remote-amogh/0', creation_time=1685379456844, experiment_id='0', last_update_time=1685379456844, lifecycle_stage='active', name='Default', tags={}>]

In [137]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

In [138]:
mlflow.set_experiment("my-experiment-1")

2023/05/30 19:29:36 INFO mlflow.tracking.fluent: Experiment with name 'my-experiment-1' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://mlflow-artifacts-remote-amogh/1', creation_time=1685454765088, experiment_id='1', last_update_time=1685454765088, lifecycle_stage='active', name='my-experiment-1', tags={}>

In [147]:
with mlflow.start_run():
    
    X, y = load_iris(return_X_y=True)
    
    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)
    
    lr = LogisticRegression(**params).fit(X, y)
    
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))
    
    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifact URI: '{mlflow.get_artifact_uri()}'")

ProfileNotFound: The config profile (default) could not be found