# Scrape and preprocess user reviews


In [1]:
# notebooks/scrape_main.ipynb
import os
import sys

# Add the scripts directory to the Python path
scripts_path = os.path.abspath('../scripts')
if scripts_path not in sys.path:
    sys.path.append(scripts_path)

try:
    from scrape_reviews import ReviewScraper
except ModuleNotFoundError:
    # If the import fails, provide a helpful error message
    raise ModuleNotFoundError(
        "Could not find 'scrape_reviews.py' in '../scripts'. "
        "Please ensure the file exists and the path is correct."
    )

# Inputs
app_ids = {
    "Dashen Bank": "com.dashen.dashensuperapp",
    "Bank of Abyssinia": "com.boa.boaMobileBanking",
    "Commercial Bank of Ethiopia": "com.combanketh.mobilebanking"
}
languages = ['en', 'am', 'om']
output_path = '../data/raw_reviews.csv'

# Scraping
scraper = ReviewScraper(app_ids, languages, output_path)
df_scraped = scraper.scrape_all()
scraper.save_to_csv(df_scraped)

df_scraped.head()


Fetching EN reviews for Dashen Bank...
Fetching AM reviews for Dashen Bank...
Fetching OM reviews for Dashen Bank...
Fetching EN reviews for Bank of Abyssinia...
Fetching AM reviews for Bank of Abyssinia...
Fetching OM reviews for Bank of Abyssinia...
Fetching EN reviews for Commercial Bank of Ethiopia...
Fetching AM reviews for Commercial Bank of Ethiopia...
Fetching OM reviews for Commercial Bank of Ethiopia...
Saved to ../data/raw_reviews.csv


Unnamed: 0,review_text,rating,date,bank_name,source,language
0,I like this mobile banking app very much. Over...,2,2025-06-07 10:40:29,Dashen Bank,Google Play,en
1,love,3,2025-06-06 00:15:44,Dashen Bank,Google Play,en
2,መቸሸጠ,5,2025-06-03 19:40:31,Dashen Bank,Google Play,en
3,wow,5,2025-06-03 17:30:11,Dashen Bank,Google Play,en
4,gadaa,5,2025-06-01 17:10:53,Dashen Bank,Google Play,en


In [1]:
from IPython.display import Markdown, display

# Report for scrape_main-eda.ipynb
scrape_main_eda_report = """
### Data Preparation Report for `scrape_main-eda.ipynb`

- **Data Collection:** User reviews were scraped from Google Play Store for three banking apps: Dashen Bank, Bank of Abyssinia, and Commercial Bank of Ethiopia, using the `ReviewScraper` class.
- **Languages:** Reviews were collected in three languages: English (`en`), Amharic (`am`), and Oromo (`om`).
- **Data Storage:** The scraped reviews were saved to a CSV file at `../data/raw_reviews.csv`.
- **Initial Inspection:** The first few rows of the scraped data were displayed to verify successful data collection and structure.
"""

# Report for task1-eda.ipynb
task1_eda_report = """
### Data Preparation, EDA, and Analysis Report for `task1-eda.ipynb`

- **Data Preparation:**
    - Loaded the raw reviews dataset.
    - Cleaned the data by handling missing values, removing duplicates, and standardizing text fields.
    - Translated non-English reviews to English for uniform analysis.
    - Added metadata such as review length and language detection.

- **Exploratory Data Analysis (EDA):**
    - Analyzed the distribution of reviews by app and language.
    - Visualized review ratings and identified trends over time.
    - Explored common keywords and sentiment distribution using word clouds and sentiment analysis.

- **Analysis:**
    - Compared user sentiment across different banking apps.
    - Identified common user concerns and positive feedback themes.
    - Highlighted actionable insights for app improvement based on review content and sentiment trends.
"""

# Display the reports
display(Markdown(scrape_main_eda_report))
display(Markdown(task1_eda_report))


### Data Preparation Report for `scrape_main-eda.ipynb`

- **Data Collection:** User reviews were scraped from Google Play Store for three banking apps: Dashen Bank, Bank of Abyssinia, and Commercial Bank of Ethiopia, using the `ReviewScraper` class.
- **Languages:** Reviews were collected in three languages: English (`en`), Amharic (`am`), and Oromo (`om`).
- **Data Storage:** The scraped reviews were saved to a CSV file at `../data/raw_reviews.csv`.
- **Initial Inspection:** The first few rows of the scraped data were displayed to verify successful data collection and structure.



### Data Preparation, EDA, and Analysis Report for `task1-eda.ipynb`

- **Data Preparation:**
    - Loaded the raw reviews dataset.
    - Cleaned the data by handling missing values, removing duplicates, and standardizing text fields.
    - Translated non-English reviews to English for uniform analysis.
    - Added metadata such as review length and language detection.

- **Exploratory Data Analysis (EDA):**
    - Analyzed the distribution of reviews by app and language.
    - Visualized review ratings and identified trends over time.
    - Explored common keywords and sentiment distribution using word clouds and sentiment analysis.

- **Analysis:**
    - Compared user sentiment across different banking apps.
    - Identified common user concerns and positive feedback themes.
    - Highlighted actionable insights for app improvement based on review content and sentiment trends.
