In [1]:
!python -V

Python 3.9.13


In [2]:
import pandas as pd
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import mean_squared_error
import mlflow
import xgboost as xgb
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [8]:
mlflow.set_tracking_uri("sqlite:///data/mlflow.db")
mlflow.set_experiment("nyc-taxi-experiment")

<Experiment: artifact_location='file:///c:/Users/ASUS/Documents/GitHub/mlops-zoomcamp/week2/mlruns/1', creation_time=1716931336348, experiment_id='1', last_update_time=1716931336348, lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>

In [7]:
df_train = pd.read_parquet('green_tripdata_2021-01.parquet')
df_val = pd.read_parquet('green_tripdata_2021-02.parquet')

In [9]:
len(df_train), len(df_val)

(76518, 64572)

In [10]:
df_train.lpep_dropoff_datetime = pd.to_datetime(df_train.lpep_dropoff_datetime)
df_train.lpep_pickup_datetime = pd.to_datetime(df_train.lpep_pickup_datetime)

df_train['duration'] = df_train.lpep_dropoff_datetime - df_train.lpep_pickup_datetime
df_train.duration = df_train.duration.apply(lambda td: td.total_seconds() / 60)

df_train = df_train[(df_train.duration >= 1) & (df_train.duration <= 60)]

categorical = ['PULocationID', 'DOLocationID']
df_train[categorical] = df_train[categorical].astype(str)

In [11]:
df_val.lpep_dropoff_datetime = pd.to_datetime(df_val.lpep_dropoff_datetime)
df_val.lpep_pickup_datetime = pd.to_datetime(df_val.lpep_pickup_datetime)

df_val['duration'] = df_val.lpep_dropoff_datetime - df_val.lpep_pickup_datetime
df_val.duration = df_val.duration.apply(lambda td: td.total_seconds() / 60)

df_val = df_val[(df_val.duration >= 1) & (df_val.duration <= 60)]

categorical = ['PULocationID', 'DOLocationID']
df_val[categorical] = df_val[categorical].astype(str)

In [12]:
df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID']
df_val['PU_DO'] = df_val['PULocationID'] + '_' + df_val['DOLocationID']

In [13]:
categorical = ['PU_DO'] #'PULocationID', 'DOLocationID']
numerical = ['trip_distance']

dv = DictVectorizer()

train_dicts = df_train[categorical + numerical].to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

val_dicts = df_val[categorical + numerical].to_dict(orient='records')
X_val = dv.transform(val_dicts)

In [14]:
target = 'duration'
y_train = df_train[target].values
y_val = df_val[target].values

In [15]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_val)

mean_squared_error(y_val, y_pred, squared=False)



7.758715203341164

In [20]:
with open('mlruns/models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [21]:
with mlflow.start_run():

    mlflow.set_tag("developer", "cristian")

    mlflow.log_param("train-data-path", "./data/green_tripdata_2021-01.csv")
    mlflow.log_param("valid-data-path", "./data/green_tripdata_2021-02.csv")

    alpha = 0.1
    mlflow.log_param("alpha", alpha)
    lr = Lasso(alpha)
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)

    mlflow.log_artifact(local_path="mlruns/models/lin_reg.bin", artifact_path="models_pickle")



In [24]:
train = xgb.DMatrix(X_train, label=y_train)
valid = xgb.DMatrix(X_val, label=y_val)

In [25]:
def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "xgboost")
        mlflow.log_params(params)
        booster = xgb.train(
            params=params,
            dtrain=train,
            num_boost_round=1000,
            evals=[(valid, 'validation')],
            early_stopping_rounds=50
        )
        y_pred = booster.predict(valid)
        rmse = mean_squared_error(y_val, y_pred, squared=False)
        mlflow.log_metric("rmse", rmse)

    return {'loss': rmse, 'status': STATUS_OK}

In [26]:
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'objective': 'reg:linear',
    'seed': 42
}

In [27]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]




[0]	validation-rmse:11.47445                          
[1]	validation-rmse:10.82465                          
[2]	validation-rmse:10.25534                          
[3]	validation-rmse:9.75806                           
[4]	validation-rmse:9.32445                           
[5]	validation-rmse:8.94854                           
[6]	validation-rmse:8.62318                           
[7]	validation-rmse:8.34218                           
[8]	validation-rmse:8.10125                           
[9]	validation-rmse:7.89460                           
[10]	validation-rmse:7.71781                          
[11]	validation-rmse:7.56630                          
[12]	validation-rmse:7.43657                          
[13]	validation-rmse:7.32636                          
[14]	validation-rmse:7.23153                          
[15]	validation-rmse:7.15027                          
[16]	validation-rmse:7.08158                          
[17]	validation-rmse:7.02252                          
[18]	valid





[0]	validation-rmse:7.99548                                                    
[1]	validation-rmse:6.87645                                                    
[2]	validation-rmse:6.59170                                                    
[3]	validation-rmse:6.50301                                                    
[4]	validation-rmse:6.46987                                                    
[5]	validation-rmse:6.45646                                                    
[6]	validation-rmse:6.44910                                                    
[7]	validation-rmse:6.44275                                                    
[8]	validation-rmse:6.43746                                                    
[9]	validation-rmse:6.42809                                                    
[10]	validation-rmse:6.42257                                                   
[11]	validation-rmse:6.41461                                                   
[12]	validation-rmse:6.40995            





[0]	validation-rmse:7.69741                                                    
[1]	validation-rmse:6.84911                                                    
[2]	validation-rmse:6.66349                                                    
[3]	validation-rmse:6.60956                                                    
[4]	validation-rmse:6.57636                                                    
[5]	validation-rmse:6.56711                                                    
[6]	validation-rmse:6.56101                                                    
[7]	validation-rmse:6.55394                                                    
[8]	validation-rmse:6.53866                                                    
[9]	validation-rmse:6.53660                                                    
[10]	validation-rmse:6.53100                                                   
[11]	validation-rmse:6.52861                                                   
[12]	validation-rmse:6.52590            





[0]	validation-rmse:11.00231                                                   
[1]	validation-rmse:10.03631                                                   
[2]	validation-rmse:9.27307                                                    
[3]	validation-rmse:8.67721                                                    
[4]	validation-rmse:8.21639                                                    
[5]	validation-rmse:7.86131                                                    
[6]	validation-rmse:7.58923                                                    
[7]	validation-rmse:7.38201                                                    
[8]	validation-rmse:7.22454                                                    
[9]	validation-rmse:7.10409                                                    
[10]	validation-rmse:7.01292                                                   
[11]	validation-rmse:6.94294                                                   
[12]	validation-rmse:6.88716            





[0]	validation-rmse:10.12554                                                     
[1]	validation-rmse:8.76061                                                      
[2]	validation-rmse:7.89649                                                      
[3]	validation-rmse:7.36355                                                      
[4]	validation-rmse:7.03877                                                      
[5]	validation-rmse:6.84023                                                      
[6]	validation-rmse:6.71747                                                      
[7]	validation-rmse:6.63623                                                      
[8]	validation-rmse:6.58204                                                      
[9]	validation-rmse:6.54603                                                      
[10]	validation-rmse:6.52015                                                     
[11]	validation-rmse:6.50233                                                     
[12]	validation-





[0]	validation-rmse:6.67366                                                      
[1]	validation-rmse:6.57507                                                      
[2]	validation-rmse:6.56017                                                      
[3]	validation-rmse:6.54680                                                      
[4]	validation-rmse:6.51987                                                      
[5]	validation-rmse:6.51278                                                      
[6]	validation-rmse:6.50324                                                      
[7]	validation-rmse:6.49481                                                      
[8]	validation-rmse:6.48730                                                      
[9]	validation-rmse:6.48353                                                      
[10]	validation-rmse:6.47643                                                     
[11]	validation-rmse:6.47310                                                     
[12]	validation-





[0]	validation-rmse:6.77953                                                    
[1]	validation-rmse:6.68358                                                    
[2]	validation-rmse:6.66638                                                    
[3]	validation-rmse:6.65507                                                    
[4]	validation-rmse:6.64446                                                    
[5]	validation-rmse:6.63472                                                    
[6]	validation-rmse:6.62853                                                    
[7]	validation-rmse:6.62347                                                    
[8]	validation-rmse:6.61327                                                    
[9]	validation-rmse:6.60287                                                    
[10]	validation-rmse:6.59762                                                   
[11]	validation-rmse:6.59244                                                   
[12]	validation-rmse:6.58649            





[0]	validation-rmse:9.31819                                                    
[1]	validation-rmse:7.86606                                                    
[2]	validation-rmse:7.18227                                                    
[3]	validation-rmse:6.86367                                                    
[4]	validation-rmse:6.71208                                                    
[5]	validation-rmse:6.63425                                                    
[6]	validation-rmse:6.58938                                                    
[7]	validation-rmse:6.55612                                                    
[8]	validation-rmse:6.53707                                                    
[9]	validation-rmse:6.52471                                                    
[10]	validation-rmse:6.51002                                                   
[11]	validation-rmse:6.50414                                                   
[12]	validation-rmse:6.49869            





[0]	validation-rmse:7.51785                                                    
[1]	validation-rmse:6.83322                                                    
[2]	validation-rmse:6.71683                                                    
[3]	validation-rmse:6.68585                                                    
[4]	validation-rmse:6.67408                                                    
[5]	validation-rmse:6.66861                                                    
[6]	validation-rmse:6.66263                                                    
[7]	validation-rmse:6.65634                                                    
[8]	validation-rmse:6.65383                                                    
[9]	validation-rmse:6.65096                                                    
[10]	validation-rmse:6.64598                                                   
[11]	validation-rmse:6.64328                                                   
[12]	validation-rmse:6.64154            





[0]	validation-rmse:9.79155                                                    
[1]	validation-rmse:8.35414                                                    
[2]	validation-rmse:7.53538                                                    
[3]	validation-rmse:7.08375                                                    
[4]	validation-rmse:6.83321                                                    
[5]	validation-rmse:6.68992                                                    
[6]	validation-rmse:6.61054                                                    
[7]	validation-rmse:6.55903                                                    
[8]	validation-rmse:6.52195                                                    
[9]	validation-rmse:6.50014                                                    
[10]	validation-rmse:6.48304                                                   
[11]	validation-rmse:6.47091                                                   
[12]	validation-rmse:6.46285            





[0]	validation-rmse:8.36453                                                     
[1]	validation-rmse:7.15552                                                     
[2]	validation-rmse:6.78800                                                     
[3]	validation-rmse:6.65834                                                     
[4]	validation-rmse:6.58465                                                     
[5]	validation-rmse:6.55909                                                     
[6]	validation-rmse:6.54859                                                     
[7]	validation-rmse:6.53551                                                     
[8]	validation-rmse:6.53088                                                     
[9]	validation-rmse:6.52328                                                     
[10]	validation-rmse:6.51867                                                    
[11]	validation-rmse:6.51255                                                    
[12]	validation-rmse:6.51054





[0]	validation-rmse:9.32033                                                     
[1]	validation-rmse:7.83954                                                     
[2]	validation-rmse:7.13379                                                     
[3]	validation-rmse:6.80159                                                     
[4]	validation-rmse:6.64145                                                     
[5]	validation-rmse:6.55221                                                     
[6]	validation-rmse:6.50376                                                     
[7]	validation-rmse:6.47746                                                     
[8]	validation-rmse:6.46154                                                     
[9]	validation-rmse:6.44750                                                     
[10]	validation-rmse:6.43708                                                    
[11]	validation-rmse:6.42622                                                    
[12]	validation-rmse:6.41638





[0]	validation-rmse:10.39914                                                    
[1]	validation-rmse:9.14830                                                     
[2]	validation-rmse:8.31133                                                     
[3]	validation-rmse:7.75965                                                     
[4]	validation-rmse:7.40208                                                     
[5]	validation-rmse:7.16960                                                     
[6]	validation-rmse:7.01879                                                     
[7]	validation-rmse:6.91695                                                     
[8]	validation-rmse:6.85090                                                     
[9]	validation-rmse:6.80944                                                     
[10]	validation-rmse:6.77960                                                    
[11]	validation-rmse:6.75504                                                    
[12]	validation-rmse:6.74051





[0]	validation-rmse:11.65699                                                    
[1]	validation-rmse:11.15058                                                    
[2]	validation-rmse:10.69028                                                    
[3]	validation-rmse:10.27249                                                    
[4]	validation-rmse:9.89411                                                     
[5]	validation-rmse:9.55201                                                     
[6]	validation-rmse:9.24299                                                     
[7]	validation-rmse:8.96460                                                     
[8]	validation-rmse:8.71440                                                     
[9]	validation-rmse:8.49036                                                     
[10]	validation-rmse:8.28937                                                    
[11]	validation-rmse:8.10904                                                    
[12]	validation-rmse:7.94780

KeyboardInterrupt: 

#### scenario-1

In [23]:
print(f"tracking URI: '{mlflow.get_tracking_uri()}'")

tracking URI: 'sqlite:///data/mlflow.db'


In [24]:
client = mlflow.tracking.MlflowClient(tracking_uri = 'sqlite:///data/mlflow.db')
experiment = client.get_experiment_by_name('nyc-taxi-experiment')

runs = client.search_runs(1)
params = runs[0].data.params

In [25]:
params

{'C': '0.1', 'random_state': '42'}

In [26]:
from mlflow.exceptions import MlflowException

try:
    client.list_registered_models()
except MlflowException:
    print("It's not possible to access the model registry :(")

AttributeError: 'MlflowClient' object has no attribute 'list_registered_models'

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

In [15]:
mlflow.set_experiment("nyc-taxi-experiment")

with mlflow.start_run():

    X, y = load_iris(return_X_y=True)

    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

default artifacts URI: 'file:///c:/Users/ASUS/Documents/GitHub/mlops-zoomcamp/week2/mlruns/1/7fc318707bca4a8b9c539439eac76579/artifacts'


In [18]:
from mlflow.tracking import MlflowClient


client = MlflowClient()

In [21]:
from mlflow.tracking import MlflowClient

# Initialize the MlflowClient
client = MlflowClient()

# Print available methods to diagnose the issue
methods = dir(client)
print("Available methods in MlflowClient:")
for method in methods:
    print(method)

# Check if list_registered_models is available
if 'list_registered_models' in methods:
    print("list_registered_models method is available.")
else:
    print("The list_registered_models method is not available in your MlflowClient.")


Available methods in MlflowClient:
__class__
__delattr__
__dict__
__dir__
__doc__
__eq__
__format__
__ge__
__getattribute__
__gt__
__hash__
__init__
__init_subclass__
__le__
__lt__
__module__
__ne__
__new__
__reduce__
__reduce_ex__
__repr__
__setattr__
__sizeof__
__str__
__subclasshook__
__weakref__
_check_artifact_file_string
_create_model_version
_get_registry_client
_log_artifact_async_helper
_log_artifact_helper
_read_from_file
_record_logged_model
_registry_uri
_start_tracked_trace
_tracking_client
_upload_ended_trace_info
_upload_trace_data
_upload_trace_spans_as_tag
copy_model_version
create_experiment
create_model_version
create_registered_model
create_run
delete_experiment
delete_model_version
delete_model_version_tag
delete_registered_model
delete_registered_model_alias
delete_registered_model_tag
delete_run
delete_tag
delete_trace_tag
delete_traces
download_artifacts
end_span
end_trace
get_experiment
get_experiment_by_name
get_latest_versions
get_metric_history
get_model_ver

In [13]:
import mlflow
print(mlflow.__version__)

2.13.0


In [22]:
import requests
import json

# Set the tracking URI (ensure this is your tracking server URI)
tracking_uri = "http://127.0.0.1:5000"

# Endpoint for listing registered models
endpoint = f"{tracking_uri}/api/2.0/mlflow/registered-models/list"

# Make the request to the MLflow REST API
response = requests.get(endpoint)
if response.status_code == 200:
    registered_models = response.json().get('registered_models', [])
    for model in registered_models:
        print(json.dumps(model, indent=2))
else:
    print(f"Failed to list registered models: {response.content}")


Failed to list registered models: b'<!doctype html>\n<html lang=en>\n<title>404 Not Found</title>\n<h1>Not Found</h1>\n<p>The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.</p>\n'


#### scenario-2

In [27]:
import mlflow


mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [28]:
print(f"tracking URI: '{mlflow.get_tracking_uri()}'")

tracking URI: 'http://127.0.0.1:5000'


In [31]:
client = mlflow.tracking.MlflowClient()
experiments = client.list_experiments()

AttributeError: 'MlflowClient' object has no attribute 'list_experiments'

In [32]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

mlflow.set_experiment("my-experiment-1")

with mlflow.start_run():

    X, y = load_iris(return_X_y=True)

    params = {"C": 0.1, "random_state": 42}
    mlflow.log_params(params)

    lr = LogisticRegression(**params).fit(X, y)
    y_pred = lr.predict(X)
    mlflow.log_metric("accuracy", accuracy_score(y, y_pred))

    mlflow.sklearn.log_model(lr, artifact_path="models")
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

2024/05/30 01:16:24 INFO mlflow.tracking.fluent: Experiment with name 'my-experiment-1' does not exist. Creating a new experiment.


default artifacts URI: 'mlflow-artifacts:/2/b6e2bf31dca049888f8af4f4175ad429/artifacts'


In [33]:
from mlflow.tracking import MlflowClient


client = MlflowClient("http://127.0.0.1:5000")

In [34]:
client.list_registered_models()

AttributeError: 'MlflowClient' object has no attribute 'list_registered_models'

In [35]:
run_id = client.list_run_infos(experiment_id='1')[0].run_id
mlflow.register_model(
    model_uri=f"runs:/{run_id}/models",
    name='iris-classifier'
)

AttributeError: 'MlflowClient' object has no attribute 'list_run_infos'

In [41]:
import mlflow
from mlflow.tracking import MlflowClient

mlflow.set_tracking_uri("sqlite:///c:/Users/ASUS/Documents/GitHub/mlops-zoomcamp/week2/data/mlflow.db")

client = MlflowClient()
experiments = client.list_experiments()

print("List of experiments:")
for experiment in experiments:
    print(f"ID: {experiment.experiment_id}, Name: {experiment.name}")

INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.


AttributeError: 'MlflowClient' object has no attribute 'list_experiments'

In [43]:
experiment_id = '2'  # Replace with your actual experiment ID

runs = client.search_runs(experiment_ids=[experiment_id])
if not runs:
    print(f"No runs found for experiment ID {experiment_id}")
else:
    for run in runs:
        print(f"Run ID: {run.info.run_id}, Status: {run.info.status}")

Run ID: b6e2bf31dca049888f8af4f4175ad429, Status: FINISHED


In [44]:
import mlflow
from mlflow import log_metric, log_param, log_artifact

mlflow.set_tracking_uri("sqlite:///c:/Users/ASUS/Documents/GitHub/mlops-zoomcamp/week2/mlruns/mlflow.db")
mlflow.set_experiment("nyc-taxi")

with mlflow.start_run() as run:
    run_id = run.info.run_id
    print(f"Logging a new run with ID: {run_id}")

    # Log parameters, metrics, and artifacts
    log_param("param1", 5)
    log_metric("metric1", 0.85)
    # Save an example artifact
    with open("output.txt", "w") as f:
        f.write("Hello, MLflow!")
    log_artifact("output.txt")

    print(f"Run {run_id} completed.")


2024/05/30 01:24:22 INFO mlflow.tracking.fluent: Experiment with name 'nyc-taxi' does not exist. Creating a new experiment.


Logging a new run with ID: 67adc94bff1e4d9aa633200f0f327ba9
Run 67adc94bff1e4d9aa633200f0f327ba9 completed.


In [45]:
from mlflow.tracking import MlflowClient

client = MlflowClient()

# Get the latest run ID for the experiment
experiment_id = '1'
runs = client.search_runs(experiment_ids=[experiment_id])
if not runs:
    print(f"No runs found for experiment ID {experiment_id}")
else:
    # Get the run ID of the latest run
    run_id = runs[0].info.run_id

    # Register the model
    model_uri = f"runs:/{run_id}/models"
    model_name = 'iris-classifier'
    registered_model = mlflow.register_model(model_uri=model_uri, name=model_name)

    print(f"Model registered with name {registered_model.name} and version {registered_model.version}")


Model registered with name iris-classifier and version 1


Successfully registered model 'iris-classifier'.
Created version '1' of model 'iris-classifier'.
