### Import 

In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from spacy.lang.fr import stop_words
from sklearn.linear_model import LogisticRegression
from mlflow.models import infer_signature
stop_words = list(stop_words.STOP_WORDS)
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
import mlflow
import subprocess
from mlflow import MlflowClient

### Connect to server and enable MlFlow.sklearn autolog

In [2]:
commit_hash = subprocess.check_output(["git", "rev-parse", "HEAD"])

In [3]:
remote_server_uri = "http://localhost:5000/"
mlflow.set_tracking_uri(remote_server_uri)
mlflow.sklearn.autolog(log_datasets=False)
mlflow.set_experiment("/1st-experiment")
run = mlflow.start_run()
mlflow.set_tags({
"mlflow.note.content" : "This is MLOPS project",
"mlflow.source.name" : "model_design_2.ipynb",
"mlflow.source.git.commit" : commit_hash,
"mlflow.source.git.branch" : "HEAD",
})



### Loading data

In [4]:
df_train = pd.read_csv("../../data/train.csv")
df_test = pd.read_csv("../../data/test.csv")
df_valid = pd.read_csv("../../data/valid.csv")

### Manual logging for dataset

### Pipeline

In [5]:
pipe = Pipeline([
    ("tfidf", TfidfVectorizer(stop_words=stop_words)),
    ("logreg", LogisticRegression( solver="liblinear"))
])

pipe.fit(df_train["review"], df_train["polarity"])
y_pred = pipe.predict(df_test["review"])



### Add test accuracy score

In [6]:
Y_test = df_test["polarity"].to_numpy()
test_accuracy_score = accuracy_score(pipe.predict(df_test["review"]), Y_test)
mlflow.log_metric("test_accuracy_score", test_accuracy_score)

### MlFlow Registry

In [None]:
signature = infer_signature(df_test["review"], y_pred)
mlflow.sklearn.log_model(pipe, name="model_v1", signature=signature, registered_model_name="test_regressor")

client = MlflowClient()
src_name = "LogisticRegression-dev"
src_uri = f"runs:/{run.info.run_id}/sklearn-model"
client.create_registered_model(src_name)
tags = {
    "champion" : "model_v1"
}
desc = "Polarity estimation from text model"
mv_src = client.create_model_version(src_name, src_uri, tags=tags, description=desc)

Successfully registered model 'test_regressor'.
2025/11/18 16:23:51 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: test_regressor, version 1
Created version '1' of model 'test_regressor'.
2025/11/18 16:23:51 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LogisticRegression-dev, version 1


### Close mlflow run

In [8]:
mlflow.end_run()

üèÉ View run delicate-dove-708 at: http://localhost:5000/#/experiments/529200409768567991/runs/be60c3918e194bc984f0bc2b38cac82d
üß™ View experiment at: http://localhost:5000/#/experiments/529200409768567991
