In [7]:
import json
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import TerminalFormatter 
from tqdm import tqdm

from google_play_scraper import Sort, reviews, app

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
sns.set(style='whitegrid', palette='muted', font_scale=1.2)

## Collecting app data

In [10]:
app_packages = [
    'com.anydo',
    'com.todoist',
    'com.ticktick.task',
    'com.habitrpg.android.habitica',
    'cc.forestapp',
    'com.oristats.habitbull',
    'com.levor.liferpgtasks',
    'com.habitnow',
    'com.microsoft.todos',
    'prox.lab.calclock',
    'com.artfulagenda.app',
    'com.tasks.android',
    'com.appgenix.bizcal',
    'com.appxy.planner',
    'com.android.chrome'
]

In [12]:
app_infos = []

for ap in tqdm(app_packages):
    info = app(ap, lang='en', country='us')
    del info['comments']
    app_infos.append(info)

100%|██████████| 14/14 [00:04<00:00,  3.14it/s]


In [14]:
def print_json(json_object):
    json_str = json.dumps(
        json_object,
        indent=2,
        sort_keys=True,
        default=str
    )
    print(highlight(json_str, JsonLexer(), TerminalFormatter()))
print_json(app_infos[0])

{
  [94m"adSupported"[39;49;00m: [34mnull[39;49;00m,
  [94m"androidVersion"[39;49;00m: [33m"Varies"[39;49;00m,
  [94m"androidVersionText"[39;49;00m: [33m"Varies with device"[39;49;00m,
  [94m"appId"[39;49;00m: [33m"com.anydo"[39;49;00m,
  [94m"containsAds"[39;49;00m: [34mfalse[39;49;00m,
  [94m"contentRating"[39;49;00m: [33m"Everyone"[39;49;00m,
  [94m"contentRatingDescription"[39;49;00m: [34mnull[39;49;00m,
  [94m"currency"[39;49;00m: [33m"USD"[39;49;00m,
  [94m"description"[39;49;00m: [33m"\ud83e\udd47 <b>\"#1 to do list app out there\u201d</b> - WSJ\r\n\ud83c\udfc6 <b>Editor's Choice</b> by Google\r\n\r\nOver 30M people rely on Any.do to stay organized and get more done.\r\nIt's a simple to do list app with reminders, planner & calendar - all in one.\r\n\r\n<b>\ud83e\udd47 \"A MUST HAVE APP\" (Lifehacker, NYTimes, USA TODAY).</b>\r\n\r\nAny.do is a free to-do list, planner & calendar app for managing and organizing your daily tasks, to-do lists, no

In [19]:
df_app_infos = pd.DataFrame(app_infos)
df_app_infos.to_csv('./data/app_data.csv', index=None, header=True)

## Scraping app data

In [27]:
app_reviews = []

for app in tqdm(app_packages):
    for score in range(1, 6):
        for sort_order in [Sort.MOST_RELEVANT, Sort.NEWEST]:
            rvs = reviews(
                app, 
                lang='en',
                country='us',
                sort=sort_order,
                count=200 if score == 3 else 100,
                filter_score_with=score
            )[0]

            for r in rvs:
                r['sortOrder'] = 'most_relevant' if sort_order == Sort.MOST_RELEVANT else 'newest'
                r['appId'] = app
            app_reviews.extend(rvs)

100%|██████████| 14/14 [01:30<00:00,  6.47s/it]


In [30]:
df_app_reviews = pd.DataFrame(app_reviews)
df_app_reviews.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
0,gp:AOqpTOEdg6yUOvcZHEm6vmnCD7vXQW7wuauIJaKhKtw...,Uchihasocksy,https://play-lh.googleusercontent.com/-jhjI7vI...,Was simply looking for a widget that could syn...,1,0,5.10.0.8,2021-03-05 00:37:04,Any.do’s Calendar Integration syncs events to ...,2021-03-07 10:03:08,most_relevant,com.anydo
1,gp:AOqpTOHTeNGxl9Zhlz-BM6ybraEsnVg0BLDubF8E_fQ...,kelvin ho,https://play-lh.googleusercontent.com/-rSnBD4Y...,Very very bad experience with the management! ...,1,5,5.10.0.8,2021-02-26 04:17:49,"Hi Kelvin, we've checked our records and could...",2021-02-28 09:36:51,most_relevant,com.anydo
2,gp:AOqpTOH5OkwJH-nRT4nopD_JNTIYpWs1xWzOsFE-pn7...,Charles Green,https://play-lh.googleusercontent.com/a-/AOh14...,"Ugh! After years of using this app, I am so ve...",1,10,5.9.0.2,2021-02-17 19:10:03,"From checking our records, the issue you've re...",2021-02-18 11:22:00,most_relevant,com.anydo
3,gp:AOqpTOG4LKg25ejg1Hw1TWs6_gIXhpwJRZZpXssMygx...,Andi Tan,https://play-lh.googleusercontent.com/-vozop-l...,I'm a new user. I think this app's great. I tr...,1,13,5.9.0.2,2021-02-07 11:54:54,The Quick-add bar on the device's Notification...,2021-02-09 09:19:18,most_relevant,com.anydo
4,gp:AOqpTOGQ7NShvSVr9Vqz67MSeTN_3rkBuYtQRtsyAo5...,Drew Dayman,https://play-lh.googleusercontent.com/a-/AOh14...,Bought it to use with Alexa. After multiple at...,1,1,5.9.0.2,2021-02-12 20:35:18,,NaT,most_relevant,com.anydo


In [31]:
df_app_reviews.to_csv('./data/app_review.csv', index=None, header=True)