# Sentiment Analysis with MLflow Experiment Tracking

This notebook integrates MLflow into the Sentiment Analysis project.

In [1]:
import os
import mlflow

# Point notebook to the SAME MLflow server
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# Create / set experiment explicitly
mlflow.set_experiment("Sentiment-Analysis-Experiment")

print("Tracking URI:", mlflow.get_tracking_uri())
print("Working directory:", os.getcwd())

import pandas as pd
import joblib
import mlflow
import mlflow.sklearn

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


2026/02/08 23:10:00 INFO mlflow.tracking.fluent: Experiment with name 'Sentiment-Analysis-Experiment' does not exist. Creating a new experiment.


Tracking URI: http://127.0.0.1:5000
Working directory: c:\Users\newtp\OneDrive\Desktop\Sentiment_Analysis_Deployment


In [2]:

mlflow.set_experiment("Sentiment-Analysis-Experiment")


<Experiment: artifact_location='mlflow-artifacts:/1', creation_time=1770572400421, experiment_id='1', last_update_time=1770572400421, lifecycle_stage='active', name='Sentiment-Analysis-Experiment', tags={}>

## Load Dataset

In [3]:
df = pd.read_csv("data.csv")

# Remove rows with missing review text
df = df.dropna(subset=["Review text"])

# Remove neutral reviews
df = df[df["Ratings"] != 3]

# Features and labels
X = df["Review text"]
y = df["Ratings"].apply(lambda x: 1 if x >= 4 else 0)

print("Dataset shape:", df.shape)
print("Class distribution:")
print(y.value_counts())




X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

Dataset shape: (7895, 8)
Class distribution:
Ratings
1    6823
0    1072
Name: count, dtype: int64
(6316,)
(1579,)
(6316,)
(1579,)


## Vectorization

In [4]:

max_features = 5000
vectorizer = TfidfVectorizer(max_features=max_features)

X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


## Model Training with MLflow

In [5]:

with mlflow.start_run(run_name="LogisticRegression_TFIDF"):

    C = 1.0
    max_iter = 1000

    model = LogisticRegression(C=C, max_iter=max_iter)
    model.fit(X_train_vec, y_train)

    y_pred = model.predict(X_test_vec)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    mlflow.log_param("model_type", "LogisticRegression")
    mlflow.log_param("C", C)
    mlflow.log_param("max_iter", max_iter)
    mlflow.log_param("max_features", max_features)

    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)

    joblib.dump(model, "sentiment_model.pkl")
    joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

    mlflow.log_artifact("sentiment_model.pkl")
    mlflow.log_artifact("tfidf_vectorizer.pkl")

    mlflow.sklearn.log_model(model, artifact_path="model")

    print("Run logged in MLflow")


  flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)


Run logged in MLflow
üèÉ View run LogisticRegression_TFIDF at: http://127.0.0.1:5000/#/experiments/1/runs/7fe38966983544019115947a1eea72be
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1


## Metrics Summary

In [6]:

print(f"Accuracy  : {accuracy:.4f}")
print(f"Precision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1 Score  : {f1:.4f}")


Accuracy  : 0.9398
Precision : 0.9428
Recall    : 0.9905
F1 Score  : 0.9661


## Model Registration
Replace <RUN_ID> with your MLflow run ID

In [7]:

 mlflow.register_model(
     "http://127.0.0.1:5000/model",
     "SentimentAnalysisModel"
 )


Successfully registered model 'SentimentAnalysisModel'.
2026/02/08 23:10:13 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: SentimentAnalysisModel, version 1
Created version '1' of model 'SentimentAnalysisModel'.


<ModelVersion: aliases=[], creation_timestamp=1770572413757, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1770572413757, metrics=None, model_id=None, name='SentimentAnalysisModel', params=None, run_id='', run_link='', source='http://127.0.0.1:5000/model', status='READY', status_message=None, tags={}, user_id='', version='1'>