In [1]:
pip install mlflow scikit-learn pandas numpy nltk prefect


Collecting mlflow
  Downloading mlflow-3.9.0-py3-none-any.whl.metadata (31 kB)
Collecting prefect
  Downloading prefect-3.6.16-py3-none-any.whl.metadata (13 kB)
Collecting mlflow-skinny==3.9.0 (from mlflow)
  Downloading mlflow_skinny-3.9.0-py3-none-any.whl.metadata (32 kB)
Collecting mlflow-tracing==3.9.0 (from mlflow)
  Downloading mlflow_tracing-3.9.0-py3-none-any.whl.metadata (19 kB)
Collecting Flask-CORS<7 (from mlflow)
  Downloading flask_cors-6.0.2-py3-none-any.whl.metadata (5.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting huey<3,>=2.5.4 (from mlflow)
  Downloading huey-2.6.0-py3-none-any.whl.metadata (4.3 kB)
Collecting skops<1 (from mlflow)
  Downloading skops-0.13.0-py3-none-any.whl.metadata (5.6 kB)


In [1]:
import pandas as pd

data = {
    "review": [
        "Amazing product, very happy",
        "Worst experience ever",
        "Average quality",
        "Excellent value for money",
        "Terrible product, waste of money",
        "Good quality and fast delivery"
    ],
    "sentiment": ["positive", "negative", "neutral", "positive", "negative", "positive"]
}

df = pd.DataFrame(data)


In [2]:
import mlflow
import mlflow.sklearn

mlflow.set_experiment("Flipkart_Sentiment_Analysis")


2026/02/09 02:55:04 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.schemas
2026/02/09 02:55:04 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.tables
2026/02/09 02:55:04 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.types
2026/02/09 02:55:04 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.constraints
2026/02/09 02:55:04 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.defaults
2026/02/09 02:55:04 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.comments
2026/02/09 02:55:05 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/02/09 02:55:05 INFO mlflow.store.db.utils: Updating database tables
2026/02/09 02:55:05 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/02/09 02:55:05 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2026/02/09 02:55:05 INFO alembic.runtime.migration: Running upgrade  -> 451aebb31d03, add metric step
2026/02/09 02:5

<Experiment: artifact_location='/content/mlruns/1', creation_time=1770605707504, experiment_id='1', last_update_time=1770605707504, lifecycle_stage='active', name='Flipkart_Sentiment_Analysis', tags={}>

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, f1_score

X_train, X_test, y_train, y_test = train_test_split(
    df["review"], df["sentiment"], test_size=0.3, random_state=42
)

vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


In [9]:
with mlflow.start_run(run_name="Logistic_Regression_Model"):
    model = LogisticRegression(max_iter=200)
    model.fit(X_train_vec, y_train)

    preds = model.predict(X_test_vec)

    acc = accuracy_score(y_test, preds)
    f1 = f1_score(y_test, preds, average="weighted")

    mlflow.log_param("model", "LogisticRegression")
    mlflow.log_param("max_iter", 200)

    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1_score", f1)

    mlflow.sklearn.log_model(model, "model")

    mlflow.end_run()


  flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)


In [10]:
with mlflow.start_run(run_name="Naive_Bayes_Model"):
    model = MultinomialNB()
    model.fit(X_train_vec, y_train)

    preds = model.predict(X_test_vec)

    acc = accuracy_score(y_test, preds)
    f1 = f1_score(y_test, preds, average="weighted")

    mlflow.log_param("model", "MultinomialNB")

    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1_score", f1)

    mlflow.sklearn.log_model(model, "model")


  flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)


In [12]:
import subprocess

# Start MLflow UI as a background process
process = subprocess.Popen(['mlflow', 'ui'])

# Display the URL for the MLflow UI
print('MLflow UI is running. Access it at: http://localhost:5000')

MLflow UI is running. Access it at: http://localhost:5000


In [14]:
import mlflow

# Dynamically retrieve the run_id for the 'Naive_Bayes_Model'
# You can also get this from the MLflow UI (http://localhost:5000)
runs = mlflow.search_runs(
    experiment_names=["Flipkart_Sentiment_Analysis"],
    filter_string="tags.mlflow.runName = 'Naive_Bayes_Model'",
    order_by=["start_time DESC"],
    max_results=1
)

if not runs.empty:
    naive_bayes_run_id = runs.iloc[0].run_id
    print(f"Found Naive_Bayes_Model run ID: {naive_bayes_run_id}")
    mlflow.register_model(
        f"runs:/{naive_bayes_run_id}/model",
        "Flipkart_Sentiment_Model"
    )
else:
    print("Could not find 'Naive_Bayes_Model' run. Please ensure it was executed and check the experiment name/run name.")
    print("Alternatively, manually replace <RUN_ID> with the correct ID from the MLflow UI.")

Registered model 'Flipkart_Sentiment_Model' already exists. Creating a new version of this model...


Found Naive_Bayes_Model run ID: 6922af68ceb24978a90e0f418eec371c


Created version '1' of model 'Flipkart_Sentiment_Model'.


In [15]:
client = mlflow.tracking.MlflowClient()

client.set_model_version_tag(
    name="Flipkart_Sentiment_Model",
    version=1,
    key="stage",
    value="production"
)

client.set_model_version_tag(
    name="Flipkart_Sentiment_Model",
    version=1,
    key="use_case",
    value="Flipkart Reviews"
)


In [16]:
from prefect import flow, task

@task
def load_data():
    return df

@task
def train_model(data):
    # training logic here
    return "Model trained"

@flow(name="Flipkart Sentiment Training Flow")
def sentiment_pipeline():
    data = load_data()
    train_model(data)

sentiment_pipeline()


INFO:prefect:Starting temporary server on http://127.0.0.1:8202
See https://docs.prefect.io/v3/concepts/server#how-to-guides for more information on running a dedicated Prefect server.
INFO:prefect.flow_runs:Beginning flow run 'discerning-guppy' for flow 'Flipkart Sentiment Training Flow'
INFO:prefect.task_runs:Finished in state Completed()
INFO:prefect.task_runs:Finished in state Completed()
INFO:prefect.flow_runs:Finished in state Completed()


In [22]:
!prefect server start
# The Prefect UI is usually accessible at: http://127.0.0.1:4200
# However, the temporary server from the previous flow run was at http://127.0.0.1:8202


Prefect collects anonymous usage data to improve the product.
To opt out: set PREFECT_SERVER_ANALYTICS_ENABLED=false on the server, or DO_NOT_TRACK=1 in the client.
Learn more: https://docs.prefect.io/concepts/telemetry


 ___ ___ ___ ___ ___ ___ _____
| _ \ _ \ __| __| __/ __|_   _|
|  _/   / _|| _|| _| (__  | |
|_| |_|_\___|_| |___\___| |_|

Configure Prefect to communicate with the server with:

    prefect config set PREFECT_API_URL=http://127.0.0.1:4200/api

View the API reference documentation at http://127.0.0.1:4200/docs

Check out the dashboard at http://127.0.0.1:4200



[31mERROR[0m:    Traceback (most recent call last):
  File "/usr/lib/python3.12/asyncio/runners.py", line 195, in run
    return runner.run(main)
           ^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "uvloop/loop.pyx", line 1512, in uvloop.loop.Loop.run_until_complete