In [1]:
!pip install google-play-scraper pandas tqdm

Collecting google_play_scraper
  Downloading google_play_scraper-1.2.7-py3-none-any.whl.metadata (50 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/50.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading google_play_scraper-1.2.7-py3-none-any.whl (28 kB)
Installing collected packages: google_play_scraper
Successfully installed google_play_scraper-1.2.7


In [25]:
import pandas as pd
from google_play_scraper import Sort, reviews
from tqdm import tqdm

In [26]:
# Mendefinisikan ID aplikasi Microsoft Office di Google Play Store
app_id = 'com.microsoft.office.officehubrow'

In [32]:
# Mendefinisikan fungsi untuk scraping dan menyimpan ulasan
def scrape_and_save_reviews(app_id, lang='id', country='id', sort=Sort.MOST_RELEVANT, count=1000, output_file='microsoft_office_reviews.csv'):
    """
    Scrapes reviews from Google Play Store and saves them to a CSV file.

    Parameters:
        app_id (str): The app ID to scrape reviews from.
        lang (str): Language of reviews (default is 'id' for Indonesian).
        country (str): Country code for reviews (default is 'id' for Indonesia).
        sort (Sort): Sorting option for reviews (default is Sort.MOST_RELEVANT).
        count (int): Number of reviews to scrape (default is 1000).
        output_file (str): The name of the output CSV file.

    Returns:
        None
    """
    all_reviews = []
    total_fetched = 0

    with tqdm(total=count, desc="Scraping Reviews", unit="review") as pbar:
        while total_fetched < count:
            try:
                fetched_reviews, _ = reviews(
                    app_id,
                    lang=lang,
                    country=country,
                    sort=sort,
                    count=min(20000, count - total_fetched)
                )

                all_reviews.extend(fetched_reviews)
                total_fetched += len(fetched_reviews)
                pbar.update(len(fetched_reviews))

                if len(fetched_reviews) == 0:
                    break

            except Exception as e:
                print(f"Error during scraping: {e}")
                break

    df_reviews = pd.DataFrame(all_reviews)
    df_reviews.drop_duplicates(subset=['reviewId'], inplace=True)

    df_reviews.to_csv(output_file, index=False)
    print(f"Scraping completed. Saved {df_reviews.shape[0]} reviews to '{output_file}'.")


In [33]:
# Menjalankan program
scrape_and_save_reviews(app_id=app_id, count=20000, output_file='app_reviews.csv')


Scraping Reviews: 100%|██████████| 20000/20000 [00:10<00:00, 1979.18review/s]


Scraping completed. Saved 20000 reviews to 'app_reviews.csv'.
