In [4]:
import os
import pandas as pd
from google_play_scraper import app, Sort, reviews
from typing import Dict, Optional

BANK_APPS = {
    'CBE': 'com.combanketh.mobilebanking',
    'BOA': 'com.boa.boaMobileBanking',
    'Dashen': 'com.dashen.dashensuperapp'  # or use 'com.cr2.amolelight' for the Dashen Mobile version
}


class ScrapData:
    def __init__(self, package_name: str, app_name: str):
        self.package_name = package_name
        self.app_name = app_name

    def scrape_app_metadata(self, output_dir="data/metadata"):
        os.makedirs(output_dir, exist_ok=True)
        result = app(self.package_name, lang="en", country='us')
        df = pd.DataFrame([result])
        filename = os.path.join(output_dir, f"{self.app_name}_metadata.csv")
        df.to_csv(filename, index=False)
        print(f"[INFO] Saved metadata for {self.app_name} to {filename}")

    def scrape_app_reviews(self, target_count: int = 400, score_filter: Optional[int] = None,
                           output_dir="data/reviews"):
        os.makedirs(output_dir, exist_ok=True)
        all_reviews = []
        next_token = None

        while len(all_reviews) < target_count:
            batch, next_token = reviews(
                self.package_name,
                lang='en',
                country='us',
                sort=Sort.NEWEST,
                count=200,
                filter_score_with=score_filter,
                continuation_token=next_token
            )
            if not batch:
                break
            all_reviews.extend(batch)
            if not next_token:
                break

        df = pd.DataFrame([{
            'app': self.app_name,
            'review': r['content'],
            'rating': r['score'],
            'date': r['at'].isoformat()
        } for r in all_reviews[:target_count]])

        filename = os.path.join(output_dir, f"{self.app_name}_reviews.csv")
        df.to_csv(filename, index=False)
        print(f"[INFO] Saved {len(df)} reviews for {self.app_name} to {filename}")


In [5]:
def run_batch_scraping(apps: Dict[str, str], target_count: int = 400):
    for app_name, package_id in apps.items():
        print(f"\n--- Scraping {app_name} ---")
        scraper = ScrapData(package_name=package_id, app_name=app_name)
        scraper.scrape_app_metadata()
        scraper.scrape_app_reviews(target_count=target_count)


In [6]:
run_batch_scraping(BANK_APPS, target_count=400)



--- Scraping CBE ---
[INFO] Saved metadata for CBE to data/metadata\CBE_metadata.csv
[INFO] Saved 400 reviews for CBE to data/reviews\CBE_reviews.csv

--- Scraping BOA ---
[INFO] Saved metadata for BOA to data/metadata\BOA_metadata.csv
[INFO] Saved 400 reviews for BOA to data/reviews\BOA_reviews.csv

--- Scraping Dashen ---
[INFO] Saved metadata for Dashen to data/metadata\Dashen_metadata.csv
[INFO] Saved 400 reviews for Dashen to data/reviews\Dashen_reviews.csv


In [None]:
import pandas as pd

df_cbe = pd.read_csv("data/reviews/CBE_reviews.csv")
df_cbe.head()
