In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
import pandas as pd
from transformers import pipeline
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# Initialize the sentiment analysis pipeline using the distilbert model fine-tuned on SST-2
classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", truncation=True)

# Define the function to get sentiment (positive/negative)
def get_sentiment(review_text):
    if review_text is None or not isinstance(review_text, str) or review_text.strip() == '':
        return None, None

    try:
        result = classifier(review_text)[0]
        sentiment = result['label']
        confidence = result['score']
        return sentiment, confidence
    except Exception as e:
        return None, None

# Read the CSV file into a Pandas DataFrame
# root_folder = '/content/drive/MyDrive/TMDB'
root_folder = '.'
df_pandas = pd.read_csv(f'{root_folder}/data/rating.csv')

# Function to apply sentiment analysis in parallel
def parallel_sentiment_analysis(reviews):
    results = []
    with ThreadPoolExecutor() as executor:
        future_to_review = {executor.submit(get_sentiment, review): review for review in reviews}
        for future in tqdm(as_completed(future_to_review), total=len(future_to_review), desc="Sentiment Analysis Progress"):
            result = future.result()
            results.append(result)
    return results

# Apply the sentiment analysis function to the 'review_text' column in parallel
sentiment_results = parallel_sentiment_analysis(df_pandas['review_text'].tolist())

# Add results to the DataFrame
df_pandas[['sentiment', 'confidence']] = pd.DataFrame(sentiment_results)

# Save the updated DataFrame to a new CSV file
df_pandas.to_csv(f'{root_folder}/output/rating_auto_label_sentiment_two_classes.csv', index=False)
print("The sentiment analysis has been added to the CSV file.")




Sentiment Analysis Progress: 100%|██████████| 10468/10468 [28:59<00:00,  6.02it/s] 


The sentiment analysis has been added to the CSV file.
