In [2]:
# --- Imports and setup ---
import os
import sys
import pandas as pd

# Ensure project root is in Python path
project_root = r"E:\10 academy\weak2\fintech-review-analytics"  # adjust if different
os.chdir(project_root)
sys.path.append(project_root)

# Now we can safely import config
from config.config import RAW_CSV_PATH

# --- Load raw reviews ---
df = pd.read_csv(RAW_CSV_PATH)
print(f"Total reviews scraped: {len(df)}\n")

# --- Number of reviews per bank ---
review_counts = df['bank'].value_counts()
print("Reviews per bank:")
print(review_counts, "\n")

# --- Total duplicates removed ---
total_before = len(df)
df_clean = df.drop_duplicates(subset=['review', 'bank'])
total_after = len(df_clean)
duplicates_removed = total_before - total_after
print(f"Total duplicates removed: {duplicates_removed}\n")

# --- Duplicates per bank ---
duplicates_per_bank = df.groupby('bank')['review'].apply(lambda x: x.duplicated().sum())
print("Duplicates removed per bank:")
print(duplicates_per_bank, "\n")

# --- Sample raw reviews (2‚Äì3 per bank) ---
for bank in df['bank'].unique():
    print(f"Sample raw reviews for {bank}:")
    samples = df[df['bank'] == bank]['review'].head(3).to_list()
    for i, review in enumerate(samples, 1):
        print(f"{i}. {review}")
    print("\n")

# --- Optional quick observations ---
missing_reviews = df['review'].isna().sum()
print(f"Missing review texts: {missing_reviews}")


Total reviews scraped: 1266

Reviews per bank:
bank
DASHEN    432
BOA       430
CBE       404
Name: count, dtype: int64 

Total duplicates removed: 0

Duplicates removed per bank:
bank
BOA       0
CBE       0
DASHEN    0
Name: review, dtype: int64 

Sample raw reviews for CBE:
1. CBE ·ã≠·àà·ã´·àç·ç¢
2. it's special for me
3. Make it user friendly.


Sample raw reviews for BOA:
1. üôèüëç
2. Very Good
3. goof


Sample raw reviews for DASHEN:
1. very smart App easy to use and friendly
2. Very exemplery App to other Bank Aps !
3. good


Missing review texts: 0


In [5]:
import pandas as pd

# Load the processed review CSV
df = pd.read_csv("data/processed/reviews_analysis.csv")

for bank in df['bank'].unique():
    bank_df = df[df['bank'] == bank]
    
    # Count positive and negative reviews
    pos_count = len(bank_df[bank_df['sentiment'] == 'POSITIVE'])
    neg_count = len(bank_df[bank_df['sentiment'] == 'NEGATIVE'])
    
    # Example reviews
    pos_example = bank_df[bank_df['sentiment'] == 'POSITIVE']['cleaned'].iloc[0]
    neg_example = bank_df[bank_df['sentiment'] == 'NEGATIVE']['cleaned'].iloc[0]
    
    print(f"Bank: {bank}")
    print(f"Positive reviews: {pos_count}, Negative reviews: {neg_count}")
    print(f"Example Positive: {pos_example}")
    print(f"Example Negative: {neg_example}\n")


Bank: CBE
Positive reviews: 261, Negative reviews: 143
Example Positive: CBE
Example Negative: maaliif daddafee install gaafata

Bank: BOA
Positive reviews: 218, Negative reviews: 212
Example Positive: nan
Example Negative: goof

Bank: DASHEN
Positive reviews: 297, Negative reviews: 135
Example Positive: smart App easy use friendly
Example Negative: exemplery App Bank Aps

