# Imports + Setup

In [2]:
import os
import sys
import pandas as pd
from tqdm import tqdm
from datetime import datetime

# Get project root (the folder above 'src')
project_root = os.path.dirname(os.getcwd())

# Add src folder to Python path
src_path = os.path.join(project_root, "src")
sys.path.append(src_path)

print("Added to path:", src_path)

# Import scraper class
from scraper import PlayStoreScraper


Added to path: d:\10acadamyWeek2 project\Customer-Experience-Analytics-for-Fintech-Apps\src


# Verify Working Directory

In [3]:
print("Current working directory:", os.getcwd())

# Move to project root if needed
if "src" in os.getcwd().split(os.sep):
    os.chdir(project_root)
    print("Changed working directory to:", os.getcwd())


Current working directory: d:\10acadamyWeek2 project\Customer-Experience-Analytics-for-Fintech-Apps\notebook


# Load Config

In [4]:
from src.config import APP_IDS, BANK_NAMES, SCRAPING_CONFIG, DATA_PATHS

print("APP IDS:", APP_IDS)
print("BANKS:", BANK_NAMES)
print("Reviews per bank:", SCRAPING_CONFIG["reviews_per_bank"])
print("Saving raw data to:", DATA_PATHS["raw_reviews"])


APP IDS: {'CBE': 'com.combanketh.mobilebanking', 'BOA': 'com.boa.boaMobileBanking', 'Dashen': 'com.dashen.dashensuperapp'}
BANKS: {'CBE': 'Commercial Bank of Ethiopia', 'BOA': 'Bank of Abyssinia', 'Dashen': 'Dashen Bank'}
Reviews per bank: 500
Saving raw data to: data/raw/reviews_raw.csv


# Run Scraper

In [5]:
print("üöÄ Starting Google Play Review Scraping...")

scraper = PlayStoreScraper()
raw_df = scraper.scrape_all_banks()

print("\nScraping Complete!")
raw_df.head()


üöÄ Starting Google Play Review Scraping...
Starting Google Play Review Scraper


Banks:   0%|          | 0/3 [00:00<?, ?it/s]


Scraping reviews for com.combanketh.mobilebanking...
‚úì Scraped 500 reviews


Banks:  33%|‚ñà‚ñà‚ñà‚ñé      | 1/3 [00:06<00:12,  6.19s/it]


Scraping reviews for com.boa.boaMobileBanking...
‚úì Scraped 500 reviews


Banks:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 2/3 [00:12<00:06,  6.09s/it]


Scraping reviews for com.dashen.dashensuperapp...
‚úì Scraped 500 reviews


Banks: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:18<00:00,  6.06s/it]


‚úì Scraping complete!
Total reviews collected: 1500
Saved to: data/raw/reviews_raw.csv

App information saved to data/raw/app_info.csv

Scraping Complete!





Unnamed: 0,review_id,review_text,rating,review_date,user_name,thumbs_up,bank_code,bank_name,source
0,361ac925-5bd6-4455-8efe-95394a4ba679,good,5,2025-11-30 16:24:48,Mahamed Abdireshid,0,CBE,Commercial Bank of Ethiopia,Google Play
1,cb37b096-e071-4f0f-a8fd-067b7d71706d,CBE ·ã≠·àà·ã´·àç·ç¢,5,2025-11-29 17:22:32,Kamil Tesfaye,0,CBE,Commercial Bank of Ethiopia,Google Play
2,70f504ff-daed-40d9-9c89-cc49a95ef659,it's special for me,5,2025-11-29 15:54:14,Abde Semed,0,CBE,Commercial Bank of Ethiopia,Google Play
3,28f229b5-0026-41b9-a1eb-b76e74736f63,Make it user friendly.,2,2025-11-29 08:17:45,TOMIZ Creativity,0,CBE,Commercial Bank of Ethiopia,Google Play
4,68d8daea-db47-4e23-a692-755173dea983,maaliif daddafee install gaafata,3,2025-11-28 13:36:32,Tesfaye Abdi,0,CBE,Commercial Bank of Ethiopia,Google Play


# Show Basic Info

In [6]:
print("Number of reviews collected:", len(raw_df))

raw_df.info()


Number of reviews collected: 1500
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   review_id    1500 non-null   object        
 1   review_text  1500 non-null   object        
 2   rating       1500 non-null   int64         
 3   review_date  1500 non-null   datetime64[ns]
 4   user_name    1500 non-null   object        
 5   thumbs_up    1500 non-null   int64         
 6   bank_code    1500 non-null   object        
 7   bank_name    1500 non-null   object        
 8   source       1500 non-null   object        
dtypes: datetime64[ns](1), int64(2), object(6)
memory usage: 105.6+ KB


# Save Raw Data (Safety Save)

In [7]:
# This ensures data persists even if notebook restarts
save_path = "data/raw/reviews_raw.csv"
os.makedirs("data/raw", exist_ok=True)

raw_df.to_csv(save_path, index=False)

print(f"Raw review data saved to {save_path}")


Raw review data saved to data/raw/reviews_raw.csv


# Show Sample Reviews Per Bank

In [8]:
for bank in raw_df.bank_code.unique():
    print("\n============================")
    print(f"Sample reviews for: {bank}")
    print("============================")
    
    display(raw_df[raw_df.bank_code == bank].head(3))



Sample reviews for: CBE


Unnamed: 0,review_id,review_text,rating,review_date,user_name,thumbs_up,bank_code,bank_name,source
0,361ac925-5bd6-4455-8efe-95394a4ba679,good,5,2025-11-30 16:24:48,Mahamed Abdireshid,0,CBE,Commercial Bank of Ethiopia,Google Play
1,cb37b096-e071-4f0f-a8fd-067b7d71706d,CBE ·ã≠·àà·ã´·àç·ç¢,5,2025-11-29 17:22:32,Kamil Tesfaye,0,CBE,Commercial Bank of Ethiopia,Google Play
2,70f504ff-daed-40d9-9c89-cc49a95ef659,it's special for me,5,2025-11-29 15:54:14,Abde Semed,0,CBE,Commercial Bank of Ethiopia,Google Play



Sample reviews for: BOA


Unnamed: 0,review_id,review_text,rating,review_date,user_name,thumbs_up,bank_code,bank_name,source
500,60e84fba-4730-499d-8f5b-88e52dda8db7,app is note active working crash,1,2025-11-30 21:57:28,BEKI Movie senter,0,BOA,Bank of Abyssinia,Google Play
501,3463230e-f9f7-4be3-a632-fdd8d017ce84,üôèüëç,5,2025-11-29 03:47:54,Yasin Alemu,0,BOA,Bank of Abyssinia,Google Play
502,a6cbfa34-f2b1-4a16-96b6-c94f58cea76f,Very Good,5,2025-11-28 20:18:20,Wariyo Dida,0,BOA,Bank of Abyssinia,Google Play



Sample reviews for: Dashen


Unnamed: 0,review_id,review_text,rating,review_date,user_name,thumbs_up,bank_code,bank_name,source
1000,8a0203ea-da03-42dd-bde0-80e4188ee4d7,"this app is not good it's, access are limited ...",1,2025-11-30 22:06:29,natnael alemneh,0,Dashen,Dashen Bank,Google Play
1001,944aa261-9166-4efd-bf85-a65f109da117,great app,5,2025-11-30 19:27:00,Bereket demeke Bereket,0,Dashen,Dashen Bank,Google Play
1002,3827cdde-0a32-46f1-b4d2-996d80947e26,good app,5,2025-11-30 19:02:40,Mesay Mebratu,0,Dashen,Dashen Bank,Google Play


In [9]:
from src.scraper import PlayStoreScraper

scraper = PlayStoreScraper()

info = scraper.get_app_info("com.combanketh.mobilebanking")
info


{'app_id': 'com.combanketh.mobilebanking',
 'title': 'Commercial Bank of Ethiopia',
 'score': 4.318146,
 'ratings': 38881,
 'reviews': 8340,
 'installs': '5,000,000+'}