In [12]:
import mlflow

In [13]:
# connect to mlflow server
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("model_design2")
mlflow.sklearn.autolog(log_datasets = False )

In [14]:
import pandas  as pd
import numpy   as np


In [15]:
df = pd.read_csv('../data/train.csv')

In [16]:
df.isna().sum()
df = df.drop(columns = ["Unnamed: 0"])

In [17]:
# import tf-idf from file = ../models/tfidf.pkl
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score , recall_score , accuracy_score


In [20]:
# get last id of the git commit 
import subprocess
import re
git_id = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
git_id = re.sub("[b']", "", str(git_id))

In [30]:
# save the model to mlflow server , save every detail about it 
with mlflow.start_run(description= "this is a model to be registred in mlflow only by code ") :
    tfidf = pickle.load(open("../models/tfidf.pkl","rb"))

    
    X = tfidf.transform(df['review'])
    y = df["polarity"]
    model_log = LogisticRegression(max_iter = 2000)
    model_log.fit(X,y)  

    df_test = pd.read_csv("../data/test.csv")
    #load the tfidf vectorizer dumped before , and use it to transform the test data into tfidf matrix that has the same shape as the train data
    X_test = tfidf.transform(df_test["review"])
    y_test = df_test["polarity"]
    y_pred = model_log.predict(X_test)

    precision = precision_score(y_test,y_pred) 
    recall = recall_score(y_test,y_pred)
    accuracy = accuracy_score(y_test,y_pred)
    mlflow.log_metric("precision",precision)
    mlflow.log_metric("recall",recall)
    mlflow.log_metric("accuracy",accuracy)
        #Identification du code source, branche, commit sous forme de tags
    mlflow.set_tag("mlflow.source.git.branch" , "main")
    mlflow.set_tag("mlflow.source.git.commit" , git_id)
    mlflow.set_tag("mlflow.source.name" , "model_design2.ipynb")
    mlflow.set_tag("preprocessing" , "tfidf")
    mlflow.set_tag("dataset" , "AlloCiné")
    # create client to registry the model 
    mlflow.sklearn.log_model(model_log , "model_log" , registered_model_name = "added by code 2 ")
    



Successfully registered model 'added by code 2 '.
2023/11/14 16:19:36 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: added by code 2 , version 1
Created version '1' of model 'added by code 2 '.
