# explore_sentiment.py: Python script extracted from notebooks/explore_sentiment.ipynb
# Purpose: Test sentiment analysis and keyword extraction for Task 2


In [1]:
import pandas as pd
import spacy
from transformers import pipeline
from collections import Counter

ModuleNotFoundError: No module named 'spacy'

# Load cleaned reviews

In [None]:
df = pd.read_csv('data/processed/reviews_clean.csv')
print(df.shape)
print(df.head())

# Initialize DistilBERT sentiment pipeline

In [None]:
sentiment_analyzer = pipeline('sentiment-analysis', 
model='distilbert-base-uncased-finetuned-sst-2-english')


# Sample 50 reviews

In [None]:
sample_df = df.sample(50, random_state=42)

# Apply sentiment analysis

In [None]:
def get_sentiment(text):
    result = sentiment_analyzer(text[:512])[0]  # Truncate to 512 tokens
    return result['label'], result['score']

sample_df['sentiment'] = sample_df['review'].apply(lambda x: get_sentiment(x)[0])
sample_df['sentiment_score'] = sample_df['review'].apply(lambda x: get_sentiment(x)[1])
print(sample_df[['review', 'sentiment', 'sentiment_score']].head())    

# Aggregate by bank and rating

In [None]:
sentiment_by_bank = sample_df.groupby(['bank', 'rating'])['sentiment_score'].mean().unstack()
print(sentiment_by_bank)


# Load spaCy model

In [None]:
nlp = spacy.load('en_core_web_sm')


# Function to extract keywords

In [None]:
def extract_keywords(text):
    doc = nlp(text.lower())
    keywords = [token.text for token in doc if token.pos_ in ['NOUN', 'PROPN'] or token.dep_ == 'compound']
    return keywords

# Apply to sample

In [None]:
sample_df['keywords'] = sample_df['review'].apply(extract_keywords)
print(sample_df[['review', 'keywords']].head())

# Group keywords by bank

In [None]:
keywords_by_bank = sample_df.groupby('bank')['keywords'].apply(lambda x: Counter([kw for sublist in x for kw in sublist]))
for bank, counter in keywords_by_bank.items():
    print(f"{bank}: {counter.most_common(10)}")


# Example manual theme clustering

In [None]:
themes = {
    'Account Access Issues': ['login', 'password', 'authentication'],
    'Transaction Performance': ['transfer', 'payment', 'slow'],
    'User Interface': ['ui', 'design', 'navigation']
}
print('Sample Themes:', themes)