Instalasi library web scrapper

In [127]:
pip install google-play-scraper

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [128]:
pip install langdetect

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [129]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [130]:
pip install -qq watermark

In [131]:
%reload_ext watermark
%watermark -v -p pandas,matplotlib,seaborn,google_play_scraper

Python implementation: CPython
Python version       : 3.8.10
IPython version      : 7.9.0

pandas             : 1.3.5
matplotlib         : 3.5.3
seaborn            : 0.11.2
google_play_scraper: 1.2.3



In [132]:
import json
import pandas as pd
from tqdm import tqdm

import seaborn as sns
import matplotlib.pyplot as plt

from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import TerminalFormatter

from google_play_scraper import Sort, reviews, app
from langdetect import detect
from langdetect.lang_detect_exception import LangDetectException

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

In [133]:
app_packages = [
  'com.riotgames.league.wildrift',
  'com.dts.freefireth',
  'com.mobile.legends',
  'com.wildlife.games.battle.royale.free.zooba',
  'com.rsg.heroesevolved' 
]

In [134]:
app_infos = []

for ap in tqdm(app_packages):
  info = app(ap, lang='en', country='us')
  del info['comments']
  app_infos.append(info)

100%|██████████| 5/5 [00:01<00:00,  3.33it/s]


In [135]:
len(app_infos)

5

In [136]:
def print_json(json_object):
  json_str = json.dumps(
    json_object, 
    indent=2, 
    sort_keys=True, 
    default=str
  )
  print(highlight(json_str, JsonLexer(), TerminalFormatter()))

In [137]:
app_infos_df = pd.DataFrame(app_infos)
app_infos_df.to_csv('apps_up.csv', index=None, header=True)

In [138]:
# app_reviews = []

# for ap in tqdm(app_packages):
#   # Mendapatkan informasi mengenai aplikasi
#   app_info = app(ap, lang='en', country='us')

#   rvs, _ = reviews(
#         ap,
#         lang='en',
#         country='en',
#         sort=Sort.NEWEST,
#         count=30000
#         )
#    # Menambahkan nama aplikasi ke setiap ulasan
#   for r in rvs:
#         r['app_name'] = app_info['title']
        
#   app_reviews.extend(rvs)  

In [139]:
app_reviews = []

for ap in tqdm(app_packages):
    # Mendapatkan informasi mengenai aplikasi
    app_info = app(ap, lang='en', country='us')

    rvs, _ = reviews(
            ap,
            lang='en',
            country='us',
            sort=Sort.MOST_RELEVANT,
            count=50000
        )

    for r in rvs:
            try:
                # Memeriksa apakah ulasan dalam bahasa Inggris
                if isinstance(r['content'], str) and detect(r['content']) == 'en':
                    # Memeriksa apakah ulasan sudah ada di app_reviews
                    if r not in app_reviews:
                      # Menambahkan nama aplikasi ke setiap ulasan
                        r['app_name'] = app_info['title']
                        app_reviews.append(r)
            except LangDetectException:
                # Mengabaikan ulasan yang tidak dapat diidentifikasi bahasanya
                pass

100%|██████████| 5/5 [1:28:02<00:00, 1056.47s/it]


In [140]:
print_json(app_reviews[0])

{
  [94m"app_name"[39;49;00m: [33m"League of Legends: Wild Rift"[39;49;00m,
  [94m"at"[39;49;00m: [33m"2023-02-28 23:52:36"[39;49;00m,
  [94m"content"[39;49;00m: [33m"I like this game alot. Game controls smooth game runs smooth graphics are stellar.. however there is a couple dislikes the suspension of players leaving matches. It's a phone app. So there are definitely times your going to have to leave a match. Phone call will do it. And the 6 to 7 minutes of wait time if not more to find a match in the first place. That's the 3 out of 5 star review."[39;49;00m,
  [94m"repliedAt"[39;49;00m: [34mnull[39;49;00m,
  [94m"replyContent"[39;49;00m: [34mnull[39;49;00m,
  [94m"reviewCreatedVersion"[39;49;00m: [33m"4.0.0.6270"[39;49;00m,
  [94m"reviewId"[39;49;00m: [33m"0db5c544-3b32-49c8-ac71-55623182eabf"[39;49;00m,
  [94m"score"[39;49;00m: [34m3[39;49;00m,
  [94m"thumbsUpCount"[39;49;00m: [34m0[39;49;00m,
  [94m"userImage"[39;49;00m: [33m"https://play-lh.

In [141]:
len(app_reviews)

225892

In [142]:
app_reviews_df = pd.DataFrame(app_reviews)

In [143]:
app_dup = app_reviews_df.duplicated(subset=['content'])

In [144]:
app_dup

0         False
1         False
2         False
3         False
4         False
          ...  
225887    False
225888    False
225889    False
225890    False
225891    False
Length: 225892, dtype: bool

In [145]:
app_dup.value_counts()

False    219321
True       6571
dtype: int64

In [146]:
app_reviews_df[app_reviews_df['content'].isnull()]

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,app_name


In [147]:
app_reviews_df = app_reviews_df.drop_duplicates(subset=['content'])
app_reviews_df

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,app_name
0,0db5c544-3b32-49c8-ac71-55623182eabf,Oklahoma Hound,https://play-lh.googleusercontent.com/a/AGNmyx...,I like this game alot. Game controls smooth ga...,3,0,4.0.0.6270,2023-02-28 23:52:36,,NaT,League of Legends: Wild Rift
1,29333e61-701d-4b4d-91cb-0246d796b375,Bryan Bush,https://play-lh.googleusercontent.com/a-/ACB-R...,For casual players: The most fundamental flaw ...,3,79,4.0.0.6270,2023-02-07 03:21:16,,NaT,League of Legends: Wild Rift
2,dfca10dc-a5e0-400c-bb9e-ca11672e5948,Vincent Agustin,https://play-lh.googleusercontent.com/a-/ACB-R...,This game is better than other mobile games bu...,3,0,4.0.0.6270,2023-03-01 16:45:36,,NaT,League of Legends: Wild Rift
3,51323dde-4adb-47c9-8536-c0abebd22412,Andre Koh,https://play-lh.googleusercontent.com/a-/ACB-R...,The game is fun and all when you get to play i...,3,342,4.0.0.6270,2023-01-25 11:28:18,,NaT,League of Legends: Wild Rift
4,15bf3e31-8ef3-4fd1-9f1b-7dd1ecf6c7b5,Lee Stefan,https://play-lh.googleusercontent.com/a/AGNmyx...,Only exclusive to this game in all games acros...,3,47,4.0.0.6270,2023-01-25 10:12:39,,NaT,League of Legends: Wild Rift
...,...,...,...,...,...,...,...,...,...,...,...
225887,81bce800-f923-4c68-9080-1514fa9e7ce1,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,Better than mobile legends hahaah no lag,5,0,1.1.7.0,2017-03-25 01:01:02,,NaT,Heroes Evolved
225888,31c34ce0-bf2e-4f59-9720-78cd19cd2b3b,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,This is the good game but not same with dota,5,0,1.1.7.0,2017-04-07 10:55:46,,NaT,Heroes Evolved
225889,3a5dffa4-76b3-4242-9189-22e87aa1d5b4,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,Its da best of all the mobile Moba games,5,0,1.1.7.0,2017-04-04 16:44:54,,NaT,Heroes Evolved
225890,51ff0526-0d95-414e-8f99-649800bf792c,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,"i like the game very much,but i can not invite...",5,0,1.1.7.0,2017-04-10 06:04:01,,NaT,Heroes Evolved


In [148]:
app_reviews_df.score.value_counts()

5    77854
1    67617
4    29398
3    25411
2    19041
Name: score, dtype: int64

In [149]:
app_reviews_df_clear = app_reviews_df.dropna(subset=['content']).reset_index(drop=True)
app_reviews_df_clear

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,app_name
0,0db5c544-3b32-49c8-ac71-55623182eabf,Oklahoma Hound,https://play-lh.googleusercontent.com/a/AGNmyx...,I like this game alot. Game controls smooth ga...,3,0,4.0.0.6270,2023-02-28 23:52:36,,NaT,League of Legends: Wild Rift
1,29333e61-701d-4b4d-91cb-0246d796b375,Bryan Bush,https://play-lh.googleusercontent.com/a-/ACB-R...,For casual players: The most fundamental flaw ...,3,79,4.0.0.6270,2023-02-07 03:21:16,,NaT,League of Legends: Wild Rift
2,dfca10dc-a5e0-400c-bb9e-ca11672e5948,Vincent Agustin,https://play-lh.googleusercontent.com/a-/ACB-R...,This game is better than other mobile games bu...,3,0,4.0.0.6270,2023-03-01 16:45:36,,NaT,League of Legends: Wild Rift
3,51323dde-4adb-47c9-8536-c0abebd22412,Andre Koh,https://play-lh.googleusercontent.com/a-/ACB-R...,The game is fun and all when you get to play i...,3,342,4.0.0.6270,2023-01-25 11:28:18,,NaT,League of Legends: Wild Rift
4,15bf3e31-8ef3-4fd1-9f1b-7dd1ecf6c7b5,Lee Stefan,https://play-lh.googleusercontent.com/a/AGNmyx...,Only exclusive to this game in all games acros...,3,47,4.0.0.6270,2023-01-25 10:12:39,,NaT,League of Legends: Wild Rift
...,...,...,...,...,...,...,...,...,...,...,...
219316,81bce800-f923-4c68-9080-1514fa9e7ce1,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,Better than mobile legends hahaah no lag,5,0,1.1.7.0,2017-03-25 01:01:02,,NaT,Heroes Evolved
219317,31c34ce0-bf2e-4f59-9720-78cd19cd2b3b,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,This is the good game but not same with dota,5,0,1.1.7.0,2017-04-07 10:55:46,,NaT,Heroes Evolved
219318,3a5dffa4-76b3-4242-9189-22e87aa1d5b4,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,Its da best of all the mobile Moba games,5,0,1.1.7.0,2017-04-04 16:44:54,,NaT,Heroes Evolved
219319,51ff0526-0d95-414e-8f99-649800bf792c,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,"i like the game very much,but i can not invite...",5,0,1.1.7.0,2017-04-10 06:04:01,,NaT,Heroes Evolved


In [150]:
app_reviews_df_clear["at"] = app_reviews_df_clear["at"].astype(str)

In [151]:
len(app_reviews_df_clear[app_reviews_df_clear["at"].str.contains("2018")])

6894

In [158]:
app_reviews_df_clear = app_reviews_df_clear[~app_reviews_df_clear["at"].str.contains("2017|2018|2019")]

In [166]:
len(app_reviews_df_clear[app_reviews_df_clear["at"].str.contains("2020")])

55324

In [167]:
app_reviews_df_clear[app_reviews_df_clear['content'].isnull()]

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,app_name


In [168]:
app_reviews_df_clear

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,app_name
0,0db5c544-3b32-49c8-ac71-55623182eabf,Oklahoma Hound,https://play-lh.googleusercontent.com/a/AGNmyx...,I like this game alot. Game controls smooth ga...,3,0,4.0.0.6270,2023-02-28 23:52:36,,NaT,League of Legends: Wild Rift
1,29333e61-701d-4b4d-91cb-0246d796b375,Bryan Bush,https://play-lh.googleusercontent.com/a-/ACB-R...,For casual players: The most fundamental flaw ...,3,79,4.0.0.6270,2023-02-07 03:21:16,,NaT,League of Legends: Wild Rift
2,dfca10dc-a5e0-400c-bb9e-ca11672e5948,Vincent Agustin,https://play-lh.googleusercontent.com/a-/ACB-R...,This game is better than other mobile games bu...,3,0,4.0.0.6270,2023-03-01 16:45:36,,NaT,League of Legends: Wild Rift
3,51323dde-4adb-47c9-8536-c0abebd22412,Andre Koh,https://play-lh.googleusercontent.com/a-/ACB-R...,The game is fun and all when you get to play i...,3,342,4.0.0.6270,2023-01-25 11:28:18,,NaT,League of Legends: Wild Rift
4,15bf3e31-8ef3-4fd1-9f1b-7dd1ecf6c7b5,Lee Stefan,https://play-lh.googleusercontent.com/a/AGNmyx...,Only exclusive to this game in all games acros...,3,47,4.0.0.6270,2023-01-25 10:12:39,,NaT,League of Legends: Wild Rift
...,...,...,...,...,...,...,...,...,...,...,...
216858,b94be9c6-4545-4594-b6bd-df3447ad0913,Mayank Gala,https://play-lh.googleusercontent.com/a-/ACB-R...,DONT INSTALL MATCHMAKING IS WORST,1,0,1.1.9.0,2020-06-29 08:32:51,,NaT,Heroes Evolved
217573,9cf9def6-9438-43f8-97c8-9963f99d02c6,Johny Nabam,https://play-lh.googleusercontent.com/a-/ACB-R...,Your matchmaking sucks..,2,0,1.1.9.0,2020-09-16 15:34:42,,NaT,Heroes Evolved
217590,2ea2939e-c366-495a-af6c-4a1a7c64dc3f,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,THE GAME IS DEAD,1,2,1.1.9.0,2020-04-16 23:27:24,,NaT,Heroes Evolved
217607,9a80facb-af88-480c-ab1e-740eb2abf462,noctix nefarioux,https://play-lh.googleusercontent.com/a-/ACB-R...,Lost account,1,0,1.1.9.0,2021-09-27 17:58:51,"Hi, please try to create a new account on the ...",2021-09-29 08:15:32,Heroes Evolved


In [169]:
app_reviews_df_clear.score.value_counts()

5    61780
1    58312
4    24035
3    21013
2    15820
Name: score, dtype: int64

In [170]:
app_reviews_df_clear.to_pickle('/content/drive/MyDrive/Tesis/saved_data/review_scrap_up2.pkl')
# app_reviews_df.to_csv('/content/drive/MyDrive/Tesis/reviews_MOBA#1.csv', index=None, header=True)

In [171]:
type(app_reviews_df['content'][0][0])

str