In [None]:
import os
import mlflow
import logging
import pandas as pd
from dotenv import load_dotenv

from data import data_preparation
from model import logistic_regression
from mlops import model_workflow

In [2]:
logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s][%(levelname)s][%(name)s]: %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)
logger = logging.getLogger(__name__)

In [3]:
load_dotenv()
RANDOM_STATE = 2026
REGISTERED_MODEL_NAME = "TFIDF_Logistic_Regression"

param_grid = {
    "tfidf__ngram_range": [(1, 1), (1, 2)],
    "tfidf__max_df": [0.8, 0.9],
    "tfidf__min_df": [5, 10],
    "clf__max_iter": [100, 500]
}

X_train, X_test, y_train, y_test = data_preparation(
    dataset="oliviervha/crypto-news",
    file_name="cryptonews.csv",
    random_state=RANDOM_STATE
)

[2026-01-31 17:56:12][INFO][data]: File 'cryptonews.csv' has been loaded with shape (31037, 7)
[2026-01-31 17:56:12][INFO][data]: Class distribution (count):
class
positive    13964
neutral     10555
negative     6518
[2026-01-31 17:56:12][INFO][data]: Class distribution (ratio):
class
positive    0.449915
neutral     0.340078
negative    0.210007
[2026-01-31 17:56:12][INFO][data]: Data preparation is complete


In [4]:
model_workflow(
    experiment_name="Sentiment_Logistic_Regression",
    run_name_prefix="logreg_gridsearch",
    Classifier=logistic_regression,
    registered_model_name=REGISTERED_MODEL_NAME,
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    param_grid=param_grid,
    random_state=RANDOM_STATE
)

[2026-01-31 17:56:15][INFO][mlops]: Start fitting the model
[2026-01-31 17:59:05][INFO][mlops]: Fitting complete
Registered model 'TF-IDF Logistic Regression' already exists. Creating a new version of this model...
2026/01/31 17:59:31 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: TF-IDF Logistic Regression, version 4
Created version '4' of model 'TF-IDF Logistic Regression'.
[2026-01-31 17:59:31][INFO][mlops]: The best model (ver.4) is registered
[2026-01-31 17:59:31][INFO][mlops]: Prod model (ver.3) is better, candidate model (ver.4) is deprecated


üèÉ View run logreg_gridsearch 2026-01-31 17:56:15 at: http://localhost:5000/#/experiments/1/runs/f67fc18b908a4e29a6c009305ec9c2c1
üß™ View experiment at: http://localhost:5000/#/experiments/1
