In [1]:
# !python preprocess.ipynb

In [2]:
import os
import configparser
import pickle
import mlflow
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

In [3]:
config = configparser.ConfigParser()
config.read('config.ini')
RANDOM_STATE = config.getint('Default', 'RANDOM_STATE')
PATH_PROCESSED_DATA = config.get('Paths', 'PATH_PROCESSED_DATA')
PATH_PROCESSED_SCHEMA = config.get('Paths', 'PATH_PROCESSED_SCHEMA')
DIR_MLRUNS = config.get('Paths', 'DIR_MLRUNS')
experiment_name = "TPT GridSearch CV=10"

In [4]:
# Chargement des données prétraitées
with open(PATH_PROCESSED_DATA, 'rb') as file:
    X_train, X_test, y_train, y_test = pickle.load(file)

In [5]:
# Initialisation
mlflow.set_tracking_uri("file:" + os.path.abspath(DIR_MLRUNS))
mlflow.sklearn.autolog(disable=True)

In [6]:
# Recuperation de experiment
try:
    experiment_id = mlflow.create_experiment(experiment_name)
except:
    experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id

In [7]:
def model_run(model,run_name,param_grid,step,X_train,X_test, y_train, y_test):

    # Entrainement du modèle
    grid = GridSearchCV(model,param_grid,cv=10,verbose=2,scoring="accuracy")
    grid.fit(X_train, y_train)

    # Prediction sur les données de tests
    y_pred = grid.best_estimator_.predict(X_test)

    # Calcul des metriques
    average = "macro"
    metrics = {
        "best_score": grid.best_score_,
        "test_accuracy_score": accuracy_score(y_test, y_pred),
        "test_f1_score": f1_score(y_test, y_pred, average=average),
        "test_precision_score": precision_score(y_test, y_pred, average=average),
        "test_recall_score": recall_score(y_test, y_pred, average=average)
    }

    # Logs
    model_path = "model"
    mlflow.sklearn.log_model(grid.best_estimator_,model_path,registered_model_name=f"vehicule_classifier_{run_name}")
    mlflow.log_params(grid.best_estimator_.get_params())
    mlflow.log_metrics(metrics,step=step)
    mlflow.log_artifact(PATH_PROCESSED_SCHEMA, model_path)

In [8]:
runs_config = [
    {
        "model": LogisticRegression(),
        "run_name":"logr",
        "param_grid": {
            "solver": ["liblinear","newton-cholesky"], 
            "C":[0.2,0.5],
            "random_state":[0,1]
        }
    },
    {
        "model": DecisionTreeClassifier(),
        "run_name":"decision_tree",
        "param_grid":  {
            "criterion": ["gini","entropy"],
            "max_depth": [4,8],
            "random_state":[0,1]
        }
    },
    {
        "model": RandomForestClassifier(),
        "run_name":"random_forest",
        "param_grid":   {
            "n_estimators": [50,100],
            "max_depth": [4,8],
            "random_state":[0,1]
        }
    },{
        "model": KNeighborsClassifier(),
        "run_name":"knn",
        "param_grid":   {
            "n_neighbors": [4,5],
            "algorithm": ['auto','ball_tree','kd_tree']
        }
    }   
]

In [11]:
runs = runs_config[3:]

In [12]:
for i, config in enumerate(runs):
    print("Train :",config["run_name"])
    with mlflow.start_run(run_name=config["run_name"],experiment_id=experiment_id) as run:
        model_run(config["model"],config["run_name"],config["param_grid"],i,X_train,X_test, y_train, y_test)

Train : knn
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] END ......................algorithm=auto, n_neighbors=4; total time=   1.4s
[CV] END ......................algorithm=auto, n_neighbors=4; total time=   1.3s
[CV] END ......................algorithm=auto, n_neighbors=4; total time=   1.2s
[CV] END ......................algorithm=auto, n_neighbors=4; total time=   1.2s
[CV] END ......................algorithm=auto, n_neighbors=4; total time=   1.3s
[CV] END ......................algorithm=auto, n_neighbors=4; total time=   1.6s
[CV] END ......................algorithm=auto, n_neighbors=4; total time=   1.6s
[CV] END ......................algorithm=auto, n_neighbors=4; total time=   1.6s
[CV] END ......................algorithm=auto, n_neighbors=4; total time=   1.6s
[CV] END ......................algorithm=auto, n_neighbors=4; total time=   1.5s
[CV] END ......................algorithm=auto, n_neighbors=5; total time=   1.6s
[CV] END ......................algor

Registered model 'vehicule_classifier_knn' already exists. Creating a new version of this model...
2023/03/10 23:11:19 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: vehicule_classifier_knn, version 7
Created version '7' of model 'vehicule_classifier_knn'.
