In [1]:
import pandas as pd
import sys
import os
sys.path.append('..')
from src.sentiment import perform_sentiment_analysis, aggregate_by_bank_and_rating
from src.themes import analyze_themes_by_bank

In [2]:
print("Loading cleaned reviews...")
df = pd.read_csv('../data/processed/cleaned_reviews.csv')
print(f"Loaded {len(df)} reviews")
print(f"Banks: {df['bank'].unique().tolist()}")

Loading cleaned reviews...
Loaded 1200 reviews
Banks: ['Commercial Bank Of Ethiopia', 'Bank Of Abyssinia', 'Dashen Bank']


In [3]:
print("\n" + "="*50)
print("PART 1: SENTIMENT ANALYSIS")
print("="*50)


PART 1: SENTIMENT ANALYSIS


In [4]:
df = perform_sentiment_analysis(df)
aggregated = aggregate_by_bank_and_rating(df)


Analyzing sentiment for each review...
  Processed 100 reviews...
  Processed 200 reviews...
  Processed 300 reviews...
  Processed 400 reviews...
  Processed 500 reviews...
  Processed 600 reviews...
  Processed 700 reviews...
  Processed 800 reviews...
  Processed 900 reviews...
  Processed 1000 reviews...
  Processed 1100 reviews...
  Processed 1200 reviews...

Sentiment analysis complete!
Sentiment distribution:
sentiment_label
positive    632
neutral     459
negative    109
Name: count, dtype: int64

Aggregating sentiment by bank and rating...
Aggregation complete!


In [5]:
print("\ SENTIMENT RESULTS:")
print(f"Total reviews analyzed: {len(df)}")
print(f"\nSentiment distribution:")
print(df['sentiment_label'].value_counts())

\ SENTIMENT RESULTS:
Total reviews analyzed: 1200

Sentiment distribution:
sentiment_label
positive    632
neutral     459
negative    109
Name: count, dtype: int64


  print("\ SENTIMENT RESULTS:")


In [6]:
print("\nAverage sentiment by bank:")
for bank in df['bank'].unique():
    bank_avg = df[df['bank'] == bank]['sentiment_score'].mean()
    print(f"  {bank}: {bank_avg:.3f}")


Average sentiment by bank:
  Commercial Bank Of Ethiopia: 0.320
  Bank Of Abyssinia: 0.213
  Dashen Bank: 0.316


In [7]:
print("\nSentiment for 1-star reviews:")
one_star = aggregated[aggregated['rating'] == 1]
for _, row in one_star.iterrows():
    print(f"  {row['bank']}: avg score = {row['avg_sentiment_score']}")


Sentiment for 1-star reviews:
  Commercial Bank Of Ethiopia: avg score = -0.049
  Bank Of Abyssinia: avg score = -0.105
  Dashen Bank: avg score = -0.136


In [8]:
print("\n" + "="*50)
print("PART 2: THEMATIC ANALYSIS")
print("="*50)


PART 2: THEMATIC ANALYSIS


In [9]:
bank_themes = analyze_themes_by_bank(df)
print("\nTHEMES FOUND:")

for bank, data in bank_themes.items():
    print(f"\n{bank}:")
    
    for theme, keywords in data['themes'].items():
        print(f"\n {theme}")
        print(f"Keywords: {', '.join(keywords[:5])}")
        
        if theme in data['examples'] and data['examples'][theme]:
            print(f"Example: '{data['examples'][theme][0]}'")


THEMATIC ANALYSIS BY BANK

Analyzing Commercial Bank Of Ethiopia...
Extracting important keywords...
Found 20 keywords

Grouping keywords into themes...
Created 5 themes
  Login & Account Access: app, application, bank, banking, best
  Transaction & Transfer: best app, branch, cbe, good, good app
  App Performance: great, like, nice, time, update
  User Interface: use, ነው
  Customer Support: service
Commercial Bank Of Ethiopia: Found 5 themes

Analyzing Bank Of Abyssinia...
Extracting important keywords...


Found 20 keywords

Grouping keywords into themes...
Created 4 themes
  Login & Account Access: app, bank, banking, best, boa
  Transaction & Transfer: developer, doesn, fix, good, like
  App Performance: mobile, mobile banking, nice, time, update
  User Interface: use, worst
Bank Of Abyssinia: Found 4 themes

Analyzing Dashen Bank...
Extracting important keywords...
Found 20 keywords

Grouping keywords into themes...
Created 4 themes
  Login & Account Access: amazing, app, bank, banking, best
  Transaction & Transfer: dashen, dashen bank, good, great, like
  App Performance: fast, mobile, nice, super, super app
  User Interface: easy, easy use, use, wow
Dashen Bank: Found 4 themes

THEMES FOUND:

Commercial Bank Of Ethiopia:

 Login & Account Access
Keywords: app, application, bank, banking, best
Example: 'good app'

 Transaction & Transfer
Keywords: best app, branch, cbe, good, good app
Example: 'CBE ይለያል።'

 App Performance
Keywords: great, like, nice, time, update
Example: 'It is ni

In [10]:
df.to_csv('../data/processed/reviews_with_sentiment.csv', index=False)
aggregated.to_csv('../data/processed/sentiment_by_bank_rating.csv', index=False)

In [11]:
import json
with open('../data/processed/bank_themes.json', 'w') as f:
    json.dump(bank_themes, f, indent=2)

In [12]:
print(f"Sentiment scores for {len(df)} reviews (100%)")

Sentiment scores for 1200 reviews (100%)
