In [1]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, AdamW
from transformers import Trainer, TrainingArguments
from datasets import load_dataset

# Load a sentiment analysis dataset (e.g., IMDB)
dataset = load_dataset("imdb")

# Load the DistilBERT tokenizer and model
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)  # 2 labels for binary sentiment classification

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=8,
    output_dir="./sentiment_model",
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=500,
    num_train_epochs=3,
    learning_rate=2e-5,
)

# Create a Trainer and fine-tune the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)

trainer.train()

# Example usage
sentence = "This is a great product."
inputs = tokenizer(sentence, return_tensors='pt', padding=True, truncation=True)
with torch.no_grad():
    outputs = model(**inputs)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=1).tolist()[0]

# Determine sentiment based on the class with the highest probability
sentiment_labels = ["Negative", "Positive"]
sentiment_index = torch.argmax(logits, dim=1).item()
sentiment = sentiment_labels[sentiment_index]

print("Predicted Sentiment:", sentiment)
print("Probability Score:", probabilities[sentiment_index])


ModuleNotFoundError: No module named 'datasets'