### Import 

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from spacy.lang.fr import stop_words
from sklearn.linear_model import LogisticRegression

stop_words = list(stop_words.STOP_WORDS)
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
import mlflow
import subprocess

### Connect to server and enable MlFlow.sklearn autolog

In [9]:
commit_hash = subprocess.check_output(["git", "rev-parse", "HEAD"])

In [None]:
remote_server_uri = "http://localhost:5000/"
mlflow.set_tracking_uri(remote_server_uri)
mlflow.sklearn.autolog(log_datasets=False)
mlflow.set_experiment("/1st-experiment")
mlflow.start_run()
mlflow.set_tags({
"mlflow.note.content" : "This is MLOPS project",
"mlflow.source.name" : "model_design_2.ipynb",
"mlflow.source.git.commit" : commit_hash,
"mlflow.source.git.branch" : "HEAD",
})



### Loading data

In [3]:
df_train = pd.read_csv("../../data/train.csv")
df_test = pd.read_csv("../../data/test.csv")
df_valid = pd.read_csv("../../data/valid.csv")

### Manual logging for dataset

### Pipeline

In [4]:
pipe = Pipeline([
    ("tfidf", TfidfVectorizer(stop_words=stop_words)),
    ("logreg", LogisticRegression( solver="liblinear"))
])

pipe.fit(df_train["review"], df_train["polarity"])
y_pred = pipe.predict(df_test["review"])



### Add test accuracy score

In [None]:
Y_test = df_test["polarity"].to_numpy()
test_accuracy_score = accuracy_score(pipe.predict(df_test["review"]), Y_test)
mlflow.log_metric("test_accuracy_score", test_accuracy_score)

### Close mlflow run

In [6]:
mlflow.end_run()

üèÉ View run puzzled-hare-223 at: http://localhost:5000/#/experiments/529200409768567991/runs/1d7562486d204607915c8124878a3f56
üß™ View experiment at: http://localhost:5000/#/experiments/529200409768567991
