In [1]:
import mlflow
from mlflow.models import infer_signature

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
# Create a model

# Load the Iris dataset
X, y = datasets.load_iris(return_X_y=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Define the model hyperparameters
params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "multi_class": "auto",
    "random_state": 8888,
}

# Train the model
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

# Predict on the test set
y_pred = lr.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)



In [6]:
# Log and save the model and metrics

# Set our tracking server uri for logging
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

# Create a new MLflow Experiment
mlflow.set_experiment("MLflow Quickstart")

# Start an MLflow run
with mlflow.start_run():
    # Log the hyperparameters
    mlflow.log_params(params)

    # Log the loss metric
    mlflow.log_metrics({"accuracy": accuracy})

    # Set a tag that (key, value) we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for iris data, 30% test split")

    # Infer the model signature (input and output schema) from the training data
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log and save the model as a scikit-learn model
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model", # Save the model in a directory called "iris_model"
        signature=signature,
        input_example=X_train,
        # registered_model_name="tracking-quickstart", # Optional: Create a registered model for the model (can also be done through the Mlflow UI)
    )

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 12.02it/s]
2024/08/24 13:05:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run welcoming-fawn-937 at: http://127.0.0.1:8080/#/experiments/389453723675841198/runs/5de61c44db1e4c649afc1832511f7fc7.
2024/08/24 13:05:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/389453723675841198.


In [None]:
# Alternative : If not registered during the run, officially register the model artifact with a specific name

# result = mlflow.register_model(
#     model_info.model_uri, "sk-learn-random-forest-reg"
# )

In [None]:
# Alternative : Save the model as a generic Python function, outside of the MLflow framework without additional metadata

# mlflow.pyfunc.save_model(
#     path="iris_model_pyfunc",
#     python_model=lr,
#     artifacts=model_info.artifacts,
#     conda_env=None,
#     code_path=None,
# )

In [6]:
# Load the model for predictions

# Load the model back for predictions as a generic Python Function model
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)
# loaded_model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{model_version}")
# champion_version = mlflow.pyfunc.load_model(f"models:/{model_name}@{alias}")



predictions = loaded_model.predict(X_test)

iris_feature_names = datasets.load_iris().feature_names

result = pd.DataFrame(X_test, columns=iris_feature_names)
result["actual_class"] = y_test
result["predicted_class"] = predictions

result[:4]

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00,  9.36it/s]


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),actual_class,predicted_class
0,6.1,2.8,4.7,1.2,1,1
1,5.7,3.8,1.7,0.3,0,0
2,7.7,2.6,6.9,2.3,2,2
3,6.0,2.9,4.5,1.5,1,1


In [None]:
# Serving

from mlflow.models import infer_signature
from mlflow.server import start_server

In [4]:
model_uri = "mlflow-artifacts:/389453723675841198/9299d1be20d44ac59efbcee9e781ab8e/artifacts/iris_model"

model = mlflow.pyfunc.load_model(model_uri)

# Infer the model signature
signature = infer_signature(X_test, y_test)

# Start the server
start_server("models:/<model_name>", port=5000, signature=signature)

MlflowException: The configured tracking uri scheme: 'file' is invalid for use with the proxy mlflow-artifact scheme. The allowed tracking schemes are: {'http', 'https'}

mlflow (check):
- model artifacts = saved model associated with a specific MLflow run (first saving of the model, not official)
- model registry =  version-controlled model in the MLflow Model Registry for collaboration and lifecycle management (after model artifacts, official)
- model serving = process of deploying machine learning models as a real-time web service 

) OK - github student  
) heroku => Invalide avec ma CB  
) OK - finaliser setup git  
1) notebooks d'exploration : regarder les résultats et explorer un peu plus
2) notebook de features engineering : recup tout sauf la partie modélisation, éventuellement créer d'autres features
3) créer les modèles

Pour kedro :  
2 pipelines :
- modélisation
- inférence
Dans chaque pipelines, plusieurs tasks : feature engineering > modélisation par exemple
=> check comment mettre des nodes dans mes pipelines

Demander chez AXA si je peux avoir accès à des projets kedro pour utiliser comme modèle