## 1. Data Loading and Overview

In [14]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# Set visualization style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 100)

print("âœ“ Libraries imported successfully")

âœ“ Libraries imported successfully


In [15]:
# Load analyzed reviews data
df = pd.read_csv('../data/processed/analyzed_reviews.csv')

print(f"Dataset loaded: {len(df)} reviews")
print(f"Columns: {df.columns.tolist()}")
print(f"\nData types:\n{df.dtypes}")

Dataset loaded: 827 reviews
Columns: ['review', 'rating', 'date', 'bank', 'source', 'sentiment_score', 'sentiment_label', 'pos_score', 'neu_score', 'neg_score', 'rating_adjusted', 'preprocessed_text', 'themes', 'themes_str']

Data types:
review                object
rating                 int64
date                  object
bank                  object
source                object
sentiment_score      float64
sentiment_label       object
pos_score            float64
neu_score            float64
neg_score            float64
rating_adjusted         bool
preprocessed_text     object
themes                object
themes_str            object
dtype: object


In [16]:
# Data completeness check
print("=" * 70)
print("DATA COMPLETENESS CHECK")
print("=" * 70)

print(f"\nTotal reviews: {len(df)}")
print(f"\nReviews per bank:")
print(df['bank'].value_counts().sort_index())

print(f"\nMissing values:")
missing = df.isnull().sum()
missing_pct = (missing / len(df) * 100).round(2)
missing_df = pd.DataFrame({
    'Missing Count': missing,
    'Percentage': missing_pct
})
print(missing_df[missing_df['Missing Count'] > 0])

print(f"\nâœ“ Data verification complete")

DATA COMPLETENESS CHECK

Total reviews: 827

Reviews per bank:
bank
Bank of Abyssinia              290
Commercial Bank of Ethiopia    227
Dashen Bank                    310
Name: count, dtype: int64

Missing values:
                   Missing Count  Percentage
preprocessed_text              2        0.24
themes_str                   126       15.24

âœ“ Data verification complete


In [17]:
# Statistical summary
print("=" * 70)
print("STATISTICAL SUMMARY")
print("=" * 70)

print(f"\n1. Rating Statistics:")
print(df['rating'].describe())

print(f"\n2. Rating Distribution:")
print(df['rating'].value_counts().sort_index())

print(f"\n3. Sentiment Distribution:")
print(df['sentiment_label'].value_counts())

print(f"\n4. Date Range:")
print(f"   Earliest: {df['date'].min()}")
print(f"   Latest: {df['date'].max()}")

print(f"\n5. Average Rating by Bank:")
bank_ratings = df.groupby('bank')['rating'].agg(['mean', 'count']).round(2)
print(bank_ratings)

print(f"\n6. Average Sentiment Score by Bank:")
bank_sentiment = df.groupby('bank')['sentiment_score'].agg(['mean', 'std']).round(4)
print(bank_sentiment)

STATISTICAL SUMMARY

1. Rating Statistics:
count    827.000000
mean       3.432890
std        1.788118
min        1.000000
25%        1.000000
50%        5.000000
75%        5.000000
max        5.000000
Name: rating, dtype: float64

2. Rating Distribution:
rating
1    251
2     43
3     53
4     57
5    423
Name: count, dtype: int64

3. Sentiment Distribution:
sentiment_label
Positive    510
Negative    304
Neutral      13
Name: count, dtype: int64

4. Date Range:
   Earliest: 2024-08-01
   Latest: 2025-11-26

5. Average Rating by Bank:
                             mean  count
bank                                    
Bank of Abyssinia            2.64    290
Commercial Bank of Ethiopia  3.77    227
Dashen Bank                  3.93    310

6. Average Sentiment Score by Bank:
                               mean     std
bank                                       
Bank of Abyssinia           -0.0047  0.4758
Commercial Bank of Ethiopia  0.2597  0.4310
Dashen Bank                  0.3698  0.

## 2. Satisfaction Drivers Analysis

Identifying what customers love about each bank's mobile app by analyzing positive reviews (4-5 stars with positive sentiment).

In [18]:
# Filter positive reviews (high ratings + positive sentiment)
positive_reviews = df[(df['rating'] >= 4) & (df['sentiment_label'] == 'Positive')].copy()

print("=" * 70)
print("POSITIVE REVIEWS ANALYSIS")
print("=" * 70)

print(f"\nTotal positive reviews: {len(positive_reviews)} ({len(positive_reviews)/len(df)*100:.1f}%)")
print(f"\nPositive reviews per bank:")
print(positive_reviews['bank'].value_counts())

print(f"\nAverage rating of positive reviews:")
print(positive_reviews.groupby('bank')['rating'].mean().round(2))

POSITIVE REVIEWS ANALYSIS

Total positive reviews: 467 (56.5%)

Positive reviews per bank:
bank
Dashen Bank                    216
Commercial Bank of Ethiopia    147
Bank of Abyssinia              104
Name: count, dtype: int64

Average rating of positive reviews:
bank
Bank of Abyssinia              4.88
Commercial Bank of Ethiopia    4.80
Dashen Bank                    4.94
Name: rating, dtype: float64


In [19]:
# Extract themes from positive reviews
def extract_themes_from_reviews(reviews_df, bank_name=None):
    """Extract and count themes from reviews."""
    if bank_name:
        reviews_df = reviews_df[reviews_df['bank'] == bank_name]
    
    # Get all themes from the reviews
    all_themes = []
    for themes_str in reviews_df['themes_str'].dropna():
        if themes_str and themes_str != '':
            themes = [t.strip() for t in themes_str.split(',')]
            all_themes.extend(themes)
    
    # Count theme occurrences
    theme_counts = Counter(all_themes)
    return theme_counts

# Analyze positive themes for each bank
print("=" * 70)
print("SATISFACTION DRIVERS (TOP THEMES IN POSITIVE REVIEWS)")
print("=" * 70)

banks = df['bank'].unique()
for bank in sorted(banks):
    print(f"\n{bank}:")
    positive_bank = positive_reviews[positive_reviews['bank'] == bank]
    themes = extract_themes_from_reviews(positive_bank)
    
    print(f"  Total positive reviews: {len(positive_bank)}")
    print(f"  Top themes:")
    for theme, count in themes.most_common(5):
        pct = count / len(positive_bank) * 100
        print(f"    - {theme}: {count} ({pct:.1f}%)")

SATISFACTION DRIVERS (TOP THEMES IN POSITIVE REVIEWS)

Bank of Abyssinia:
  Total positive reviews: 104
  Top themes:
    - User Experience: 64 (61.5%)
    - Customer Support: 20 (19.2%)
    - Features & Functionality: 12 (11.5%)
    - Performance: 11 (10.6%)
    - Updates & Improvements: 9 (8.7%)

Commercial Bank of Ethiopia:
  Total positive reviews: 147
  Top themes:
    - User Experience: 87 (59.2%)
    - Customer Support: 23 (15.6%)
    - Features & Functionality: 21 (14.3%)
    - Updates & Improvements: 21 (14.3%)
    - Negative Experience: 13 (8.8%)

Dashen Bank:
  Total positive reviews: 216
  Top themes:
    - User Experience: 161 (74.5%)
    - Features & Functionality: 56 (25.9%)
    - Performance: 53 (24.5%)
    - Updates & Improvements: 41 (19.0%)
    - Authentication & Security: 38 (17.6%)


In [20]:
# Extract key positive keywords from preprocessed text
def get_top_keywords(reviews_df, bank_name=None, top_n=15):
    """Extract top keywords from preprocessed text."""
    if bank_name:
        reviews_df = reviews_df[reviews_df['bank'] == bank_name]
    
    # Get all tokens from preprocessed text
    all_tokens = []
    for text in reviews_df['preprocessed_text'].dropna():
        if text and text.strip():
            tokens = text.split()
            all_tokens.extend(tokens)
    
    # Count token occurrences
    token_counts = Counter(all_tokens)
    return token_counts.most_common(top_n)

print("=" * 70)
print("TOP KEYWORDS IN POSITIVE REVIEWS")
print("=" * 70)

for bank in sorted(banks):
    print(f"\n{bank}:")
    positive_bank = positive_reviews[positive_reviews['bank'] == bank]
    keywords = get_top_keywords(positive_bank, top_n=15)
    
    print(f"  Top 15 keywords:")
    for word, count in keywords:
        print(f"    {word}: {count}")

TOP KEYWORDS IN POSITIVE REVIEWS

Bank of Abyssinia:
  Top 15 keywords:
    best: 18
    good: 16
    boa: 16
    great: 13
    please: 9
    ethiopia: 7
    need: 6
    like: 5
    easy: 5
    service: 5
    update: 5
    nice: 5
    money: 5
    thank: 5
    fast: 4

Commercial Bank of Ethiopia:
  Top 15 keywords:
    good: 39
    cbe: 18
    best: 16
    nice: 13
    service: 10
    ethiopia: 10
    update: 9
    great: 8
    easy: 8
    time: 7
    love: 7
    like: 7
    apps: 7
    money: 7
    make: 6

Dashen Bank:
  Top 15 keywords:
    dashen: 57
    super: 47
    best: 37
    easy: 29
    fast: 28
    feature: 23
    good: 19
    digital: 18
    service: 17
    payment: 17
    experience: 17
    user: 17
    great: 16
    amazing: 16
    make: 16


In [21]:
# Show sample positive reviews for each bank
print("=" * 70)
print("SAMPLE POSITIVE REVIEWS")
print("=" * 70)

for bank in sorted(banks):
    print(f"\n{bank} - Sample Positive Reviews:")
    positive_bank = positive_reviews[positive_reviews['bank'] == bank]
    
    # Get top 3 highest rated reviews
    samples = positive_bank.nlargest(3, 'sentiment_score')[['review', 'rating', 'sentiment_score']]
    
    for idx, row in samples.iterrows():
        print(f"\n  Rating: {row['rating']}â˜… | Sentiment: {row['sentiment_score']:.3f}")
        print(f"  Review: {row['review'][:150]}...")

SAMPLE POSITIVE REVIEWS

Bank of Abyssinia - Sample Positive Reviews:

  Rating: 5â˜… | Sentiment: 0.985
  Review: ðŸ¥°ðŸ¥°ðŸ¥°ðŸ¥°ðŸ¥° app is good but i was live in abroad and when i enter my otp code it didnt make me to write my code it the app want it self write thats not fair...

  Rating: 4â˜… | Sentiment: 0.925
  Review: This is the best app; many features are awesome, but it should work without the need to turn off the developer options. I'm tired of having to constan...

  Rating: 5â˜… | Sentiment: 0.872
  Review: Nice to meet you my proud bank in Ethiopia.. I'm a member of this bank, i need to solve my problem of international receiving money for me from my onl...

Commercial Bank of Ethiopia - Sample Positive Reviews:

  Rating: 5â˜… | Sentiment: 0.983
  Review: Truly, super competitive when compared to a well known app nowadays functioning broadly by simplest features and means namely known as Telebirr super ...

  Rating: 5â˜… | Sentiment: 0.964
  Review: I use the Commerci

## 3. Pain Points Analysis

Identifying customer complaints and issues by analyzing negative reviews (1-2 stars with negative sentiment).

In [22]:
# Filter negative reviews (low ratings + negative sentiment)
negative_reviews = df[(df['rating'] <= 2) & (df['sentiment_label'] == 'Negative')].copy()

print("=" * 70)
print("NEGATIVE REVIEWS ANALYSIS")
print("=" * 70)

print(f"\nTotal negative reviews: {len(negative_reviews)} ({len(negative_reviews)/len(df)*100:.1f}%)")
print(f"\nNegative reviews per bank:")
print(negative_reviews['bank'].value_counts())

print(f"\nAverage rating of negative reviews:")
print(negative_reviews.groupby('bank')['rating'].mean().round(2))

print(f"\nAverage sentiment score of negative reviews:")
print(negative_reviews.groupby('bank')['sentiment_score'].mean().round(3))

NEGATIVE REVIEWS ANALYSIS

Total negative reviews: 282 (34.1%)

Negative reviews per bank:
bank
Bank of Abyssinia              157
Dashen Bank                     71
Commercial Bank of Ethiopia     54
Name: count, dtype: int64

Average rating of negative reviews:
bank
Bank of Abyssinia              1.07
Commercial Bank of Ethiopia    1.26
Dashen Bank                    1.20
Name: rating, dtype: float64

Average sentiment score of negative reviews:
bank
Bank of Abyssinia             -0.335
Commercial Bank of Ethiopia   -0.277
Dashen Bank                   -0.355
Name: sentiment_score, dtype: float64


In [23]:
# Analyze negative themes for each bank
print("=" * 70)
print("PAIN POINTS (TOP THEMES IN NEGATIVE REVIEWS)")
print("=" * 70)

for bank in sorted(banks):
    print(f"\n{bank}:")
    negative_bank = negative_reviews[negative_reviews['bank'] == bank]
    themes = extract_themes_from_reviews(negative_bank)
    
    print(f"  Total negative reviews: {len(negative_bank)}")
    print(f"  Top pain points:")
    for theme, count in themes.most_common(5):
        pct = count / len(negative_bank) * 100
        print(f"    - {theme}: {count} ({pct:.1f}%)")

PAIN POINTS (TOP THEMES IN NEGATIVE REVIEWS)

Bank of Abyssinia:
  Total negative reviews: 157
  Top pain points:
    - Negative Experience: 75 (47.8%)
    - Technical Issues: 72 (45.9%)
    - Performance: 51 (32.5%)
    - Customer Support: 36 (22.9%)
    - Updates & Improvements: 34 (21.7%)

Commercial Bank of Ethiopia:
  Total negative reviews: 54
  Top pain points:
    - Features & Functionality: 25 (46.3%)
    - Technical Issues: 20 (37.0%)
    - Updates & Improvements: 18 (33.3%)
    - Negative Experience: 15 (27.8%)
    - Customer Support: 12 (22.2%)

Dashen Bank:
  Total negative reviews: 71
  Top pain points:
    - Technical Issues: 29 (40.8%)
    - Performance: 28 (39.4%)
    - Features & Functionality: 24 (33.8%)
    - User Experience: 22 (31.0%)
    - Negative Experience: 22 (31.0%)


In [24]:
# Extract key negative keywords
print("=" * 70)
print("TOP KEYWORDS IN NEGATIVE REVIEWS")
print("=" * 70)

for bank in sorted(banks):
    print(f"\n{bank}:")
    negative_bank = negative_reviews[negative_reviews['bank'] == bank]
    keywords = get_top_keywords(negative_bank, top_n=15)
    
    print(f"  Top 15 complaint keywords:")
    for word, count in keywords:
        print(f"    {word}: {count}")

TOP KEYWORDS IN NEGATIVE REVIEWS

Bank of Abyssinia:
  Top 15 complaint keywords:
    work: 33
    time: 30
    worst: 25
    ever: 21
    please: 21
    doesnt: 17
    developer: 16
    fix: 16
    boa: 16
    update: 15
    dont: 15
    option: 14
    experience: 14
    working: 13
    problem: 13

Commercial Bank of Ethiopia:
  Top 15 complaint keywords:
    work: 12
    cant: 10
    update: 9
    say: 8
    branch: 7
    working: 7
    doesnt: 7
    transaction: 7
    cbe: 6
    account: 6
    time: 6
    new: 6
    fix: 6
    telebirr: 5
    transfer: 5

Dashen Bank:
  Top 15 complaint keywords:
    slow: 12
    worst: 12
    working: 10
    account: 10
    cant: 10
    ever: 9
    time: 9
    super: 8
    please: 7
    like: 7
    money: 7
    transaction: 7
    need: 7
    open: 7
    amole: 6


In [25]:
# Show sample negative reviews for each bank
print("=" * 70)
print("SAMPLE NEGATIVE REVIEWS")
print("=" * 70)

for bank in sorted(banks):
    print(f"\n{bank} - Sample Negative Reviews:")
    negative_bank = negative_reviews[negative_reviews['bank'] == bank]
    
    # Get 3 most negative reviews
    samples = negative_bank.nsmallest(3, 'sentiment_score')[['review', 'rating', 'sentiment_score']]
    
    for idx, row in samples.iterrows():
        print(f"\n  Rating: {row['rating']}â˜… | Sentiment: {row['sentiment_score']:.3f}")
        print(f"  Review: {row['review'][:150]}...")

SAMPLE NEGATIVE REVIEWS

Bank of Abyssinia - Sample Negative Reviews:

  Rating: 1â˜… | Sentiment: -0.931
  Review: Shockingly bad! Even when it decides to work, it's painfully slow and frustrating. Such a shame that it has become a stain on an extraordinary bank...

  Rating: 1â˜… | Sentiment: -0.931
  Review: I will give only one star, because it faced with multiple of problems. 1. The app is not as fast as the other banks App, for e.g like CBE 2. The App a...

  Rating: 1â˜… | Sentiment: -0.920
  Review: I don't know what is wrong with BOA as a bank in general. It's been going backwards since last year or so. The app is a disaster to use in every possi...

Commercial Bank of Ethiopia - Sample Negative Reviews:

  Rating: 1â˜… | Sentiment: -0.929
  Review: This app dash board is disturbing me. it is not showing me traxation and my balance while displaying others option on dashboard. I think it is a risk ...

  Rating: 1â˜… | Sentiment: -0.898
  Review: it suddenly asked me to enter t

In [26]:
# Identify specific problem patterns in negative reviews
print("=" * 70)
print("SPECIFIC PROBLEM PATTERNS")
print("=" * 70)

# Define problem patterns to search for
problem_patterns = {
    'Crashes/Errors': ['crash', 'error', 'bug', 'broken', 'fail'],
    'Performance Issues': ['slow', 'loading', 'lag', 'freeze', 'stuck'],
    'Login/Access': ['login', 'password', 'cant', 'access', 'locked'],
    'Updates': ['update', 'version', 'new'],
    'Developer Options': ['developer', 'option', 'disable']
}

for bank in sorted(banks):
    print(f"\n{bank}:")
    negative_bank = negative_reviews[negative_reviews['bank'] == bank]
    
    for problem, keywords in problem_patterns.items():
        # Count reviews mentioning any of the keywords
        count = 0
        for text in negative_bank['preprocessed_text'].dropna():
            if any(keyword in text.lower() for keyword in keywords):
                count += 1
        
        if count > 0:
            pct = count / len(negative_bank) * 100
            print(f"  {problem}: {count} reviews ({pct:.1f}%)")

SPECIFIC PROBLEM PATTERNS

Bank of Abyssinia:
  Crashes/Errors: 20 reviews (12.7%)
  Performance Issues: 22 reviews (14.0%)
  Login/Access: 21 reviews (13.4%)
  Updates: 22 reviews (14.0%)
  Developer Options: 16 reviews (10.2%)

Commercial Bank of Ethiopia:
  Crashes/Errors: 4 reviews (7.4%)
  Performance Issues: 1 reviews (1.9%)
  Login/Access: 13 reviews (24.1%)
  Updates: 13 reviews (24.1%)
  Developer Options: 4 reviews (7.4%)

Dashen Bank:
  Crashes/Errors: 11 reviews (15.5%)
  Performance Issues: 18 reviews (25.4%)
  Login/Access: 8 reviews (11.3%)
  Updates: 5 reviews (7.0%)
  Developer Options: 5 reviews (7.0%)


## Summary: Key Satisfaction Drivers and Pain Points

### Bank of Abyssinia (BOA)
**Satisfaction Drivers (104 positive reviews, 35.9% of total):**
- **User Experience (61.5%)**: Best, great, nice app - customers appreciate the overall experience
- **Customer Support (19.2%)**: Thank you, please, good service - appreciation for support
- **Easy to Use (11.5%)**: Simple, easy features mentioned positively
- **Fast Performance**: Quick transactions and loading times when working properly
- Top keywords: "best", "good", "great", "easy", "service", "fast"

**Pain Points (157 negative reviews, 54.1% of total):**
- **Technical Issues (45.9%)**: "Not working", "worst", "broken" - highest complaint category
- **Performance Problems (32.5%)**: Slow, lag, loading issues
- **Developer Options Issue (10.2%)**: Unique problem - app requires disabling developer options
- **Updates Breaking App (14.0%)**: New updates causing problems
- **Poor Experience (47.8%)**: "Worst ever", general dissatisfaction
- Top complaints: "doesn't work", "worst", "developer options", "fix", "problem"

### Commercial Bank of Ethiopia (CBE)
**Satisfaction Drivers (147 positive reviews, 64.8% of total):**
- **User Experience (59.2%)**: Good, nice, great - overall positive experience
- **Service Quality (15.6%)**: Excellent customer service, easy to use
- **Features (14.3%)**: Balance check, transfers, transaction features appreciated
- **Updates (14.3%)**: App improvements and new features welcomed
- Top keywords: "good", "best", "nice", "service", "easy", "love", "great"

**Pain Points (54 negative reviews, 23.8% of total):**
- **Missing Features (46.3%)**: Pay to beneficiary disabled on Android, feature gaps
- **Login/Access Issues (24.1%)**: Branch verification required, can't login
- **Technical Issues (37.0%)**: App not working, errors, crashes
- **Update Problems (24.1%)**: New versions causing issues
- **Platform Inequality**: iOS users get more features than Android users
- Top complaints: "can't", "doesn't work", "branch", "transaction", "fix"

### Dashen Bank
**Satisfaction Drivers (216 positive reviews, 69.7% of total):**
- **Outstanding User Experience (74.5%)**: "Super app", "best", "amazing" - highest satisfaction
- **Fast & Easy (24.5%)**: Speed and ease of use highly praised
- **Rich Features (25.9%)**: Digital payments, QR codes, 3-click payment
- **Modern Digital Banking (19.0%)**: Digital experience, user-friendly interface
- **Secure (17.6%)**: Security and authentication features appreciated
- Top keywords: "dashen super", "best", "easy", "fast", "feature", "amazing", "digital"

**Pain Points (71 negative reviews, 22.9% of total):**
- **Technical Issues (40.8%)**: "Temporarily unavailable", app stops working
- **Performance Problems (39.4%)**: Slow, worst, lag - 25.4% mention performance
- **Account Issues (33.8%)**: Can't open accounts, money problems
- **Crashes (15.5%)**: App freezing and crashing
- **Transaction Failures**: Failed transfers and payments
- Top complaints: "slow", "worst", "can't", "not working", "transaction", "account"

---

## Key Comparative Insights

**Best Overall Satisfaction:** Dashen Bank (69.7% positive) > CBE (64.8%) > BOA (35.9%)

**Most Common Strengths Across All Banks:**
1. User Experience and Interface
2. Ease of Use
3. Fast Performance (when working)
4. Good Customer Service

**Most Common Pain Points Across All Banks:**
1. Technical Issues (crashes, errors, bugs)
2. Performance Problems (slow, loading, lag)
3. Login/Access Difficulties
4. Updates Breaking Functionality

**Bank-Specific Issues:**
- **BOA**: Developer options problem is unique and widespread
- **CBE**: Platform inequality (Android vs iOS features)
- **Dashen**: "Temporarily unavailable" error is common