- **This notebook will load the reviews and save them to a file using the *GooglePlayReviewScraper* utility class and performs initial data cleaning and inspection procedures**

In [1]:
# Add the parent directory to sys.path
import sys, os
from pathlib import Path

parent_directory = Path(os.getcwd()).parent
sys.path.insert(0, str(parent_directory))
sys.path

['/home/miki/Desktop/KIFIYA/week-2/customer-experience-analytics',
 '/usr/lib/python312.zip',
 '/usr/lib/python3.12',
 '/usr/lib/python3.12/lib-dynload',
 '',
 '/home/miki/Desktop/KIFIYA/week-2/customer-experience-analytics/venv/lib/python3.12/site-packages']

In [12]:
# Import scrape reviews and scrape reviews from each bank
from scripts.scrape_reviews import main as scrape_reviews

scrape_reviews(app_id = "com.combanketh.mobilebanking",
               bank_name = "CBE",
               total_reviews = 400,
               lang= "en",
               file_name = f"{parent_directory}/data/cbe_reviews.csv",)

2025-06-06 16:58:34,144 - INFO - Starting review scrape for CBE (App ID: com.combanketh.mobilebanking)
2025-06-06 16:58:34,145 - INFO - Fetching 100 reviews... (collected so far: 0)
2025-06-06 16:58:38,180 - INFO - Fetching 100 reviews... (collected so far: 100)
2025-06-06 16:58:41,319 - INFO - Fetching 100 reviews... (collected so far: 200)
2025-06-06 16:58:44,895 - INFO - Fetching 100 reviews... (collected so far: 300)
2025-06-06 16:58:53,541 - INFO - Finished scraping. Total reviews collected: 400
2025-06-06 16:58:53,567 - INFO - Saved reviews to /home/miki/Desktop/KIFIYA/week-2/customer-experience-analytics/data/cbe_reviews.csv


In [13]:
# Do the same for the other banks(BOA and Dashen)
scrape_reviews(app_id = "com.boa.boaMobileBanking",
               bank_name = "BOA",
               total_reviews = 400,
               lang= "en",
               file_name = f"{parent_directory}/data/boa_reviews.csv",)

scrape_reviews(app_id = "com.dashen.dashensuperapp",
               bank_name = "Dashen",
               total_reviews = 400,
               lang= "en",
               file_name = f"{parent_directory}/data/dashen_reviews.csv",)

2025-06-06 17:07:02,764 - INFO - Starting review scrape for BOA (App ID: com.boa.boaMobileBanking)
2025-06-06 17:07:02,766 - INFO - Fetching 100 reviews... (collected so far: 0)
2025-06-06 17:07:05,214 - INFO - Fetching 100 reviews... (collected so far: 100)
2025-06-06 17:07:07,566 - INFO - Fetching 100 reviews... (collected so far: 200)
2025-06-06 17:07:09,389 - INFO - Fetching 100 reviews... (collected so far: 300)
2025-06-06 17:07:12,312 - INFO - Finished scraping. Total reviews collected: 400
2025-06-06 17:07:12,348 - INFO - Saved reviews to /home/miki/Desktop/KIFIYA/week-2/customer-experience-analytics/data/boa_reviews.csv
2025-06-06 17:07:12,352 - INFO - Starting review scrape for Dashen (App ID: com.dashen.dashensuperapp)
2025-06-06 17:07:12,354 - INFO - Fetching 100 reviews... (collected so far: 0)
2025-06-06 17:07:14,803 - INFO - Fetching 100 reviews... (collected so far: 100)
2025-06-06 17:07:17,027 - INFO - Fetching 100 reviews... (collected so far: 200)
2025-06-06 17:08:27,

### Data cleaning

In [2]:
# Use load_reviews utility class to load reviews
from scripts.load_csv import load_reviews

cbe_reviews = load_reviews(filepath=f"{parent_directory}/data/cbe_reviews.csv")
cbe_reviews.head()

2025-06-07 17:08:40,938 - INFO - Successfully loaded data from /home/miki/Desktop/KIFIYA/week-2/customer-experience-analytics/data/cbe_reviews.csv with shape (400, 11)


Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion
0,b12d0383-9b27-4e49-a94d-277a43b15800,Yesuf Ahmed,https://play-lh.googleusercontent.com/a/ACg8oc...,the app is proactive and a good connections.,5,0,5.1.0,2025-06-05 15:55:10,,,5.1.0
1,dd9f9e37-177a-46df-b877-d0edaa9aed29,Yonas Mekonnen,https://play-lh.googleusercontent.com/a-/ALV-U...,I cannot send to cbebirr app. through this app.,3,0,,2025-06-05 11:12:49,,,
2,8e34703c-203c-4180-8b32-bfd0b3f0c871,Yibrah Yebo,https://play-lh.googleusercontent.com/a/ACg8oc...,good,4,0,,2025-06-05 10:21:59,,,
3,6f0c7fa2-3ce1-4310-b135-54fe0cb9fccd,kibru tebeka,https://play-lh.googleusercontent.com/a-/ALV-U...,not functional,1,0,5.1.0,2025-06-05 07:38:12,,,5.1.0
4,f0dd744a-9409-4619-9800-7ea501571b09,Abreham Shiferaw,https://play-lh.googleusercontent.com/a/ACg8oc...,everytime you uninstall the app you have to re...,1,0,5.1.0,2025-06-04 21:33:36,,,5.1.0


In [3]:
# Use clean_reviews utility function to clean cbe data
import importlib
from scripts import clean_reviews_data
importlib.reload(clean_reviews_data)

from scripts.clean_reviews_data import clean_reviews

cbe_reviews_cleaned = clean_reviews(df=cbe_reviews,
                                         drop_columns=["userImage", "thumbsUpCount", "reviewCreatedVersion", "replyContent", "repliedAt", "appVersion"],
                                         rename_columns={"content": "review", "score": "rating", "at": "date"},
                                         new_columns={"bank": "CBE", "source": "Google Play Store"})
cbe_reviews_cleaned.head()

2025-06-07 17:08:40,987 - INFO - Data cleaned. New shape: (400, 7)


Unnamed: 0,reviewId,userName,review,rating,date,bank,source
0,b12d0383-9b27-4e49-a94d-277a43b15800,Yesuf Ahmed,the app is proactive and a good connections.,5,2025-06-05 15:55:10,CBE,Google Play Store
1,dd9f9e37-177a-46df-b877-d0edaa9aed29,Yonas Mekonnen,I cannot send to cbebirr app. through this app.,3,2025-06-05 11:12:49,CBE,Google Play Store
2,8e34703c-203c-4180-8b32-bfd0b3f0c871,Yibrah Yebo,good,4,2025-06-05 10:21:59,CBE,Google Play Store
3,6f0c7fa2-3ce1-4310-b135-54fe0cb9fccd,kibru tebeka,not functional,1,2025-06-05 07:38:12,CBE,Google Play Store
4,f0dd744a-9409-4619-9800-7ea501571b09,Abreham Shiferaw,everytime you uninstall the app you have to re...,1,2025-06-04 21:33:36,CBE,Google Play Store


In [4]:
# Do the same procedures for reviews from BOA and dashen
boa_reviews = load_reviews(filepath=f"{parent_directory}/data/boa_reviews.csv")
dashen_reviews = load_reviews(filepath=f"{parent_directory}/data/dashen_reviews.csv")

boa_reviews_cleaned = clean_reviews(df=boa_reviews,
                                         drop_columns=["userImage", "thumbsUpCount", "reviewCreatedVersion", "replyContent", "repliedAt", "appVersion"],
                                         rename_columns={"content": "review", "score": "rating", "at": "date"},
                                         new_columns={"bank": "BOA", "source": "Google Play Store"})

dashen_reviews_cleaned = clean_reviews(df=dashen_reviews,
                                         drop_columns=["userImage", "thumbsUpCount", "reviewCreatedVersion", "replyContent", "repliedAt", "appVersion"],
                                         rename_columns={"content": "review", "score": "rating", "at": "date"},
                                         new_columns={"bank": "Dashen", "source": "Google Play Store"})

2025-06-07 17:08:41,125 - INFO - Successfully loaded data from /home/miki/Desktop/KIFIYA/week-2/customer-experience-analytics/data/boa_reviews.csv with shape (400, 11)
2025-06-07 17:08:41,138 - INFO - Successfully loaded data from /home/miki/Desktop/KIFIYA/week-2/customer-experience-analytics/data/dashen_reviews.csv with shape (400, 11)
2025-06-07 17:08:41,143 - INFO - Data cleaned. New shape: (400, 7)
2025-06-07 17:08:41,147 - INFO - Data cleaned. New shape: (400, 7)


In [5]:
# Combine data and inspect missing values
import pandas as pd

combined_df = pd.concat([cbe_reviews_cleaned, boa_reviews_cleaned, dashen_reviews_cleaned], ignore_index=True)
combined_df.shape

(1200, 7)

In [6]:
combined_df.isnull().sum()

reviewId    0
userName    0
review      0
rating      0
date        0
bank        0
source      0
dtype: int64

In [7]:
# Data is clean and complete, so save to a new file
combined_df.to_csv(f"{parent_directory}/data/all_reviews.csv")