In [None]:
!pip install google-play-scraper



In [None]:
import google_play_scraper

In [None]:
app_id = 'com.openai.chatgpt'

In [None]:
from google_play_scraper import Sort
from google_play_scraper.constants.element import ElementSpecs
from google_play_scraper.constants.regex import Regex
from google_play_scraper.constants.request import Formats
from google_play_scraper.utils.request import post

import pandas as pd
from datetime import datetime
from tqdm import tqdm
import time
import json
from time import sleep
from typing import List, Optional, Tuple

In [None]:
MAX_COUNT_EACH_FETCH = 199


class _ContinuationToken:
    __slots__ = (
        "token",
        "lang",
        "country",
        "sort",
        "count",
        "filter_score_with",
        "filter_device_with",
    )

    def __init__(
        self, token, lang, country, sort, count, filter_score_with, filter_device_with
    ):
        self.token = token
        self.lang = lang
        self.country = country
        self.sort = sort
        self.count = count
        self.filter_score_with = filter_score_with
        self.filter_device_with = filter_device_with


def _fetch_review_items(
    url: str,
    app_id: str,
    sort: int,
    count: int,
    filter_score_with: Optional[int],
    filter_device_with: Optional[int],
    pagination_token: Optional[str],
):
    dom = post(
        url,
        Formats.Reviews.build_body(
            app_id,
            sort,
            count,
            "null" if filter_score_with is None else filter_score_with,
            "null" if filter_device_with is None else filter_device_with,
            pagination_token,
        ),
        {"content-type": "application/x-www-form-urlencoded"},
    )
    match = json.loads(Regex.REVIEWS.findall(dom)[0])

    return json.loads(match[0][2])[0], json.loads(match[0][2])[-2][-1]


def reviews(
    app_id: str,
    lang: str = "en",
    country: str = "us",
    sort: Sort = Sort.MOST_RELEVANT,
    count: int = 100,
    filter_score_with: int = None,
    filter_device_with: int = None,
    continuation_token: _ContinuationToken = None,
) -> Tuple[List[dict], _ContinuationToken]:
    sort = sort.value

    if continuation_token is not None:
        token = continuation_token.token

        if token is None:
            return (
                [],
                continuation_token,
            )

        lang = continuation_token.lang
        country = continuation_token.country
        sort = continuation_token.sort
        count = continuation_token.count
        filter_score_with = continuation_token.filter_score_with
        filter_device_with = continuation_token.filter_device_with
    else:
        token = None

    url = Formats.Reviews.build(lang=lang, country=country)

    _fetch_count = count

    result = []

    while True:
        if _fetch_count == 0:
            break

        if _fetch_count > MAX_COUNT_EACH_FETCH:
            _fetch_count = MAX_COUNT_EACH_FETCH

        try:
            review_items, token = _fetch_review_items(
                url,
                app_id,
                sort,
                _fetch_count,
                filter_score_with,
                filter_device_with,
                token,
            )
        except (TypeError, IndexError):
            #funnan MOD start
            token = continuation_token.token
            continue
            #MOD end

        for review in review_items:
            result.append(
                {
                    k: spec.extract_content(review)
                    for k, spec in ElementSpecs.Review.items()
                }
            )

        _fetch_count = count - len(result)

        if isinstance(token, list):
            token = None
            break

    return (
        result,
        _ContinuationToken(
            token, lang, country, sort, count, filter_score_with, filter_device_with
        ),
    )


def reviews_all(app_id: str, sleep_milliseconds: int = 0, **kwargs) -> list:
    kwargs.pop("count", None)
    kwargs.pop("continuation_token", None)

    continuation_token = None

    result = []

    while True:
        _result, continuation_token = reviews(
            app_id,
            count=MAX_COUNT_EACH_FETCH,
            continuation_token=continuation_token,
            **kwargs
        )

        result += _result

        if continuation_token.token is None:
            break

        if sleep_milliseconds:
            sleep(sleep_milliseconds / 1000)

    return result

In [None]:
reviews_count = 10

In [None]:
result = []
continuation_token = None

with tqdm(total=reviews_count, position=0, leave=True) as pbar:
    while len(result) < reviews_count:
        new_result, continuation_token = reviews(
            app_id,
            continuation_token=continuation_token,
            lang='en',  # Bahasa ulasan
            country='us',  # Negara tempat aplikasi berada
            sort=Sort.MOST_RELEVANT,  # Urutan ulasan: paling relevan
            filter_score_with=None,
            count=199  # Jumlah ulasan per permintaan (maksimal 199)
        )
        if not new_result:
            break
        result.extend(new_result)
        pbar.update(len(new_result))


199it [00:00, 1052.37it/s]


In [None]:
df = pd.DataFrame(result)

In [None]:
len(df.index) #kemudian hitung kembali berapa jumlah data yg didapatkan

199

In [None]:
df.columns

Index(['reviewId', 'userName', 'userImage', 'content', 'score',
       'thumbsUpCount', 'reviewCreatedVersion', 'at', 'replyContent',
       'repliedAt', 'appVersion'],
      dtype='object')

In [None]:
df = df[['reviewId', 'userName', 'content', 'score',
       'thumbsUpCount', 'reviewCreatedVersion', 'at', 'appVersion']]

In [None]:
df.head()

Unnamed: 0,reviewId,userName,content,score,thumbsUpCount,reviewCreatedVersion,at,appVersion
0,554f4a3b-40ca-4479-9e4b-800fcabbffb3,Mitchell “Muscle Man” Sorrenstein,"It's a very good app, the UI, Settings, and st...",5,68,1.2024.226,2024-09-02 09:40:10,1.2024.226
1,b34696ee-8c76-444b-adff-d93c7eeded69,Mark Matzke,"At first, GPT was a bit intimidating—a constan...",5,210,1.2024.219,2024-08-21 01:18:53,1.2024.219
2,f5181231-6315-45dd-8897-949ac9136942,NA NA,"Good stuff, really. Can't fault it at all, but...",5,15,1.2024.226,2024-08-31 17:14:26,1.2024.226
3,53fff525-5159-45df-860d-86bd558b3ffd,Lauren Bain,It's okay. The AI answers well but all the rul...,3,1,1.2024.233,2024-09-02 19:11:05,1.2024.233
4,b6c1592d-278f-4683-b767-f4bccffd33af,Michael Pyles,Cool up until I'm running a D&D campaign and i...,1,2,1.2024.226,2024-09-03 06:35:17,1.2024.226


In [None]:
df.to_csv('chatgpt_reviews_US.csv', index = False)

In [None]:
display(df)

Unnamed: 0,reviewId,userName,content,score,thumbsUpCount,reviewCreatedVersion,at,appVersion
0,554f4a3b-40ca-4479-9e4b-800fcabbffb3,Mitchell “Muscle Man” Sorrenstein,"It's a very good app, the UI, Settings, and st...",5,68,1.2024.226,2024-09-02 09:40:10,1.2024.226
1,b34696ee-8c76-444b-adff-d93c7eeded69,Mark Matzke,"At first, GPT was a bit intimidating—a constan...",5,210,1.2024.219,2024-08-21 01:18:53,1.2024.219
2,f5181231-6315-45dd-8897-949ac9136942,NA NA,"Good stuff, really. Can't fault it at all, but...",5,15,1.2024.226,2024-08-31 17:14:26,1.2024.226
3,53fff525-5159-45df-860d-86bd558b3ffd,Lauren Bain,It's okay. The AI answers well but all the rul...,3,1,1.2024.233,2024-09-02 19:11:05,1.2024.233
4,b6c1592d-278f-4683-b767-f4bccffd33af,Michael Pyles,Cool up until I'm running a D&D campaign and i...,1,2,1.2024.226,2024-09-03 06:35:17,1.2024.226
...,...,...,...,...,...,...,...,...
194,913d534b-7bfa-4b00-a4ce-9cdbbf49d36f,Asfak Molla,The most favourite AI tool that I use But some...,4,0,1.2024.226,2024-09-03 07:45:41,1.2024.226
195,01e82ede-46a0-4979-95a9-a6c9a959c262,Dear Usama,"**Rating: ⭐⭐⭐⭐⭐** **Review:** ""ChatGPT has exc...",5,309,1.2024.177,2024-07-16 07:29:32,1.2024.177
196,165aa087-e115-4c3f-8783-f98017d93129,Victor Kolba,easy to use and free from ads. great experienc...,5,9,1.2024.206,2024-08-21 22:56:25,1.2024.206
197,f13c9729-4fc2-4530-bd9b-063a2134c038,Rajshree Soni,The user is preparing for exams and prefers st...,4,3,1.2024.226,2024-08-23 11:48:59,1.2024.226
