In [13]:
# import libraries
import os 

import mlflow
from pprint import pprint
from minio import Minio

from helpers import read_data, read_data_from_minio, test_model_by_registry_version, test_model_by_run_id, test_model_by_registry_all_versions

In [14]:
# specify path to the local data directory 
data_path = f"{os.path.abspath(os.path.join(os.getcwd(), os.pardir))}/data/"
data_path

'/Users/abdessamadbaahmed/Desktop/livrable_mp_data/data/'

In [15]:
# initialize minioClient with an endpoint and access/secret keys.
minio_client = Minio('20.224.70.229:9000',
                    access_key='abdessamadbaahmed',
                    secret_key='baahmedabdessamad', secure=False)

# list all buckets
buckets = minio_client.list_buckets()
buckets

[Bucket('nba-investment-data')]

In [16]:
# read the data from minio and display the head, if it fails, read the data from the local directory
try:
    test = read_data_from_minio(minio_client, "nba-investment-data", "nba_logreg_processed_test.csv")
    display(test.head())
except Exception:
    test = read_data(f"{data_path}nba_logreg_processed_test.csv")
    test.head()

Unnamed: 0,GP,MIN,PTS,FGM,FGA,FG%,3P Made,3PA,3P%,FTM,FTA,FT%,OREB,DREB,REB,AST,STL,BLK,TOV,TARGET_5Yrs
0,82.0,33.0,17.1,7.0,14.3,49.0,0.0,0.1,20.0,3.0,4.4,68.7,3.7,4.2,8.0,2.4,1.2,0.5,3.0,1
1,74.0,26.4,7.8,3.1,7.4,41.6,0.1,0.7,21.2,1.5,2.3,65.9,0.3,1.6,1.9,4.5,0.7,0.1,2.2,1
2,67.0,16.9,5.1,2.0,3.4,59.6,0.0,0.0,0.0,1.1,1.8,57.7,1.6,2.9,4.5,0.6,0.5,0.3,0.8,0
3,79.0,26.3,10.8,4.2,8.5,49.9,0.0,0.1,25.0,2.3,3.2,73.1,2.2,3.5,5.7,2.2,0.7,0.7,1.5,1
4,82.0,20.8,8.5,3.1,6.3,49.1,0.1,0.4,36.7,2.1,2.7,77.2,1.4,2.6,4.0,1.3,0.8,0.8,1.3,1


In [17]:
#  define test data
X_test = test.drop("TARGET_5Yrs", axis=1)
y_test = test["TARGET_5Yrs"]

In [18]:
# set tracking uri
mlflow.set_tracking_uri("http://20.224.70.229:5000/")

# set experiment name
mlflow.set_experiment("nba-investment-experiment")

# initialize mlflow client
client = mlflow.tracking.MlflowClient()

# Search for experiments
experiments = client.search_experiments()

# Print the experiment ID and name for each experiment
for experiment in experiments:
    print(f"Experiment ID: {experiment.experiment_id}")
    print(f"Experiment name: {experiment.name}")

Experiment ID: 1
Experiment name: nba-investment-experiment


In [19]:
for mv in client.search_model_versions("name='nba-investment-prediction-model'"):
    pprint(dict(mv), indent=4)

{   'creation_timestamp': 1673525898445,
    'current_stage': 'Production',
    'description': '',
    'last_updated_timestamp': 1673883694751,
    'name': 'nba-investment-prediction-model',
    'run_id': '346cb05abd8d4f06b9a5574aa46644ab',
    'run_link': '',
    'source': 'mlflow-artifacts:/1/346cb05abd8d4f06b9a5574aa46644ab/artifacts/model',
    'status': 'READY',
    'status_message': '',
    'tags': {},
    'user_id': '',
    'version': '2'}
{   'creation_timestamp': 1673525948208,
    'current_stage': 'None',
    'description': '',
    'last_updated_timestamp': 1673881790243,
    'name': 'nba-investment-prediction-model',
    'run_id': '92ba1717068040aba1137975e81e4b81',
    'run_link': '',
    'source': 'mlflow-artifacts:/1/92ba1717068040aba1137975e81e4b81/artifacts/model',
    'status': 'READY',
    'status_message': '',
    'tags': {},
    'user_id': '',
    'version': '3'}
{   'creation_timestamp': 1673525667721,
    'current_stage': 'None',
    'description': '',
    'last_u

In [20]:
test_model_by_registry_version("nba-investment-model-f1", 1, X_test, y_test)

{'model_name': 'nba-investment-model-f1',
 'model_version': 1,
 'accuracy': 0.7126865671641791,
 'precision': 0.779874213836478,
 'recall': 0.7469879518072289,
 'f1': 0.763076923076923}

In [21]:
test_model_by_registry_all_versions("nba-investment-model-f1", X_test, y_test, sort_by="f1")

Unnamed: 0,model_name,model_version,accuracy,precision,recall,f1
2,nba-investment-model-f1,3,0.746269,0.781609,0.819277,0.8
3,nba-investment-model-f1,4,0.738806,0.785714,0.795181,0.790419
7,nba-investment-model-f1,8,0.731343,0.770115,0.807229,0.788235
8,nba-investment-model-f1,9,0.731343,0.770115,0.807229,0.788235
1,nba-investment-model-f1,2,0.712687,0.754286,0.795181,0.774194
5,nba-investment-model-f1,6,0.712687,0.760234,0.783133,0.771513
6,nba-investment-model-f1,7,0.712687,0.760234,0.783133,0.771513
4,nba-investment-model-f1,5,0.712687,0.763314,0.777108,0.770149
0,nba-investment-model-f1,1,0.712687,0.779874,0.746988,0.763077


In [22]:
def transition_best_model_version_to_prod(X_test, y_test, model_name, metric, mlfow_client):
    """
    The function transitions the best model version on the model registry to production by testing it on the test data 
    :param X_test: test features
    :param y_test: test target
    :param model_name: name of the model on the registry
    :param metric: metric to use for model selection
    :param mlfow_client: mlflow client
    
    :return: None
    """
    
    def test_model_by_registry_all_versions(X_test, y_test, model_name):
        """
        the function tests all the versions of the model and returns a DataFrame with the evaluation metrics
        
        :param model_name: name of the model on the registry
        :param X_test: test features
        :param y_test: test target
        
        :return: dictionary with the evaluation metrics
        """
        i = 0
        res = []
        while True:
            try:
                res.append(
                    test_model_by_registry_version(model_name, i + 1, X_test, y_test)
                )
            except Exception:
                break
            i += 1
        return res

    testing_metrics = test_model_by_registry_all_versions(X_test, y_test, "nba-investment-prediction-model")
        
    max_version = None
    max_metric = float('-inf')
    for data in testing_metrics:
        if data[metric] > max_metric:
            max_metric = data[metric]
            max_version = data["model_version"]

    mlfow_client.transition_model_version_stage(
    name=model_name,
    version=max_version,
    stage="Production"
    )
    return max_version


In [23]:
transition_best_model_version_to_prod(X_test, y_test, "nba-investment-model-f1", "f1", client)

 - psutil (current: 5.9.4, required: psutil==5.9.0)
 - typing-extensions (current: 4.4.0, required: typing-extensions==4.2.0)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
 - psutil (current: 5.9.4, required: psutil==5.9.0)
 - typing-extensions (current: 4.4.0, required: typing-extensions==4.2.0)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
 - psutil (current: 5.9.4, required: psutil==5.9.0)
 - typing-extensions (current: 4.4.0, required: typing-extensions==4.2.0)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


1