In [1]:
import os
import pandas as pd
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

# Load the tokenizer and model
tokenizer = DistilBertTokenizer.from_pretrained('./distilbert_sentiment_model')
model = DistilBertForSequenceClassification.from_pretrained('./distilbert_sentiment_model')

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Function to predict sentiment for a given text
def predict_sentiment(text):
    # Tokenize and prepare the input
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    inputs = {key: value.to(device) for key, value in inputs.items()}

    # Generate prediction
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=1).item()

    # Map prediction to sentiment labels (adjust this mapping based on your model)
    label_mapping = {0: "negative", 1: "neutral", 2: "positive"}
    return label_mapping[predicted_class]

# Load the dataset
input_file_path = './dataset/Datafiniti_Amazon_Consumer_Reviews_of_Amazon_Products_May19.csv'
output_file_path = './dataset/output_dataset_with_sentiment.csv'

# Make sure the input file exists
if os.path.exists(input_file_path):
    df = pd.read_csv(input_file_path)

    # Apply sentiment prediction
    print("Predicting sentiment for user reviews...")
    df['sentiment'] = df['reviews.text'].apply(predict_sentiment)

    # Save the updated dataset
    df.to_csv(output_file_path, index=False)
    print(f"Output file successfully saved at: {output_file_path}")
else:
    print(f"The input file {input_file_path} does not exist. Please check the file path and name.")


Predicting sentiment for user reviews...
Output file successfully saved at: ./dataset/output_dataset_with_sentiment.csv
