In [0]:
%pip install transformers torch mlflow

Collecting transformers
  Downloading transformers-4.57.6-py3-none-any.whl.metadata (43 kB)
Collecting torch
  Downloading torch-2.10.0-cp312-cp312-manylinux_2_28_aarch64.whl.metadata (31 kB)
Collecting mlflow
  Downloading mlflow-3.8.1-py3-none-any.whl.metadata (31 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2026.1.15-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl.metadata (40 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Downloading tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (7.3 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Downloading safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (4.1 kB)
Collecting tqdm>=4.27 (from transformers)
  Downloading tqdm-4.67.1-py3-none-any.whl.metada

In [0]:
%pip install transformers torch mlflow

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
from transformers import pipeline
import mlflow

  from torch.utils._pytree import _broadcast_to_and_flatten, tree_flatten, tree_unflatten


In [0]:
from transformers import pipeline
import mlflow

# Load pre-trained sentiment analysis model explicitly

classifier = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)


# Example product reviews
reviews = [
    "This product is amazing!",
    "Terrible quality, waste of money"
]


# Perform sentiment analysis
raw_results = classifier(reviews)


# Format confidence scores to 3 decimals, max 0.999

formatted_results = [
    {
        "review": review,
        "sentiment": r["label"],
        "confidence_score": min(round(r["score"], 3), 0.999)
    }
    for review, r in zip(reviews, raw_results)
]

print("Formatted sentiment results:")
print(formatted_results)

# Log to MLflow

with mlflow.start_run(run_name="sentiment_model"):
    # Log the model used
    mlflow.log_param("model", "distilbert-sentiment")
    
    # Log a sample metric (you can replace this with real metrics later)
    mlflow.log_metric("accuracy", 0.95)
    
    # Optional: log each review and confidence as metrics
    for i, r in enumerate(formatted_results, 1):
        mlflow.log_metric(f"review_{i}_confidence", r["confidence_score"])
    
    # Optional: log the results as text
    mlflow.log_text(str(formatted_results), "sentiment_results.txt")

print("Sentiment analysis results logged to MLflow Successfully")

Device set to use cpu


Formatted sentiment results:
[{'review': 'This product is amazing!', 'sentiment': 'POSITIVE', 'confidence_score': 0.999}, {'review': 'Terrible quality, waste of money', 'sentiment': 'NEGATIVE', 'confidence_score': 0.999}]
Sentiment analysis results logged to MLflow Successfully
