In [10]:
from nltk.sentiment import SentimentIntensityAnalyzer
import pandas as pd
from transformers import pipeline


# Load your cleaned CSV
# Step 2: Load your cleaned reviews dataset
# Replace 'cleaned_reviews.csv' with your actual file name if it's different



df = pd.read_csv('../data/raw/banks_review_cleaned.csv')
# Display the first few rows to understand the structure
df.head(50)


# Step 3: Load the pre-trained sentiment analysis pipeline from HuggingFace Transformers
# We're using a lightweight BERT model fine-tuned on SST-2 dataset for binary sentiment (positive/negative)
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

# Step 4: Prepare review text
# Ensure the review column is in string format and truncate long reviews to the first 512 characters
# (Transformer models like BERT have a max token limit, usually around 512 tokens)
df['short_review'] = df['review'].astype(str).str[:512]

# Step 5: Apply the sentiment analysis pipeline to each review
# This will return a dictionary with 'label' (POSITIVE or NEGATIVE) and 'score' (confidence)
df['sentiment_result'] = df['short_review'].apply(lambda x: sentiment_pipeline(x)[0])

# Step 6: Extract 'label' and 'score' from the result dictionary into separate columns
df['sentiment_label'] = df['sentiment_result'].apply(lambda x: x['label'])  # POSITIVE or NEGATIVE
df['sentiment_score'] = df['sentiment_result'].apply(lambda x: x['score'])  # Confidence score

# Optional: Convert label to lowercase for consistency
df['sentiment_label'] = df['sentiment_label'].str.lower()

# Step 7: Preview the result
print(df[['review', 'sentiment_label', 'sentiment_score']].head())

# Step 8: Save the result to a new CSV for later use (e.g. thematic analysis)
df.to_csv("../data/processed/bank_reviews_with_sentiment.csv", index=False)





Device set to use cpu


                                              review sentiment_label  \
0  "Why don’t your ATMs support account-to-accoun...        negative   
1                        what is this app problem???        negative   
2       the app is proactive and a good connections.        positive   
3    I cannot send to cbebirr app. through this app.        negative   
4                                               good        positive   

   sentiment_score  
0         0.996465  
1         0.999623  
2         0.999868  
3         0.995335  
4         0.999816  
