In [1]:
### install some required packages
!pip install -qq google-play-scraper
!pip install -qq -U watermark

In [3]:
### reload watermark 
%reload_ext watermark
%watermark -v -p pandas,matplotlib,seaborn,google_play_scraper

Python implementation: CPython
Python version       : 3.10.4
IPython version      : 8.11.0

pandas             : 1.5.3
matplotlib         : 3.7.0
seaborn            : 0.12.2
google_play_scraper: 1.2.3



In [10]:
import json
import pandas as pd
from tqdm import tqdm

import seaborn as sns
import matplotlib.pyplot as plt

from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import TerminalFormatter

from google_play_scraper import Sort, reviews, app

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

In [6]:
app_packages = [
  'com.anydo',
  'com.todoist',
  'com.ticktick.task',
  'com.habitrpg.android.habitica',
  'cc.forestapp',
  'com.oristats.habitbull',
  'com.levor.liferpgtasks',
  'com.habitnow',
  'com.microsoft.todos',
  'prox.lab.calclock',
  'com.gmail.jmartindev.timetune',
  'com.artfulagenda.app',
  'com.tasks.android',
  'com.appgenix.bizcal',
  'com.appxy.planner'
]

In [11]:
## scraping app information
app_infos = []

for ap in tqdm(app_packages):
    info = app(ap, lang='en', country='us')
    del info['comments']
    app_infos.append(info)

100%|██████████| 15/15 [00:08<00:00,  1.86it/s]


In [12]:

def print_json(json_object):
  json_str = json.dumps(
    json_object, 
    indent=2, 
    sort_keys=True, 
    default=str
  )
  print(highlight(json_str, JsonLexer(), TerminalFormatter()))

In [13]:
print_json(app_infos[0])

{[37m[39;49;00m
[37m  [39;49;00m[94m"adSupported"[39;49;00m:[37m [39;49;00m[34mfalse[39;49;00m,[37m[39;49;00m
[37m  [39;49;00m[94m"appId"[39;49;00m:[37m [39;49;00m[33m"com.anydo"[39;49;00m,[37m[39;49;00m
[37m  [39;49;00m[94m"containsAds"[39;49;00m:[37m [39;49;00m[34mfalse[39;49;00m,[37m[39;49;00m
[37m  [39;49;00m[94m"contentRating"[39;49;00m:[37m [39;49;00m[33m"Everyone"[39;49;00m,[37m[39;49;00m
[37m  [39;49;00m[94m"contentRatingDescription"[39;49;00m:[37m [39;49;00m[34mnull[39;49;00m,[37m[39;49;00m
[37m  [39;49;00m[94m"currency"[39;49;00m:[37m [39;49;00m[33m"USD"[39;49;00m,[37m[39;49;00m
[37m  [39;49;00m[94m"description"[39;49;00m:[37m [39;49;00m[33m"\ud83e\udd47 <b>\"#1 to do list app out there\u201d</b> - WSJ\r\n\ud83c\udfc6 <b>Editor's Choice</b> by Google\r\n\r\nOver 30M people rely on Any.do to stay organized and get more done.\r\nIt's a simple to do list app with reminders, planner & calendar - all in one.\r\n\

In [15]:
app_infos_df = pd.DataFrame(app_infos)
app_infos_df.to_csv('apps.csv', index=None, header=True)

In [17]:
app_infos_df.head()

Unnamed: 0,title,description,descriptionHTML,summary,installs,minInstalls,realInstalls,score,ratings,reviews,...,videoImage,contentRating,contentRatingDescription,adSupported,containsAds,released,updated,version,appId,url
0,Any.do - To do list & Calendar,"🥇 <b>""#1 to do list app out there”</b> - WSJ\r...",🥇 <b>&quot;#1 to do list app out there”</b> - ...,"Simple planner with to-do list, tasks, reminde...","10,000,000+",10000000,29065588,4.40599,461079,35953,...,,Everyone,,False,False,"Nov 10, 2011",1676919646,Varies with device,com.anydo,https://play.google.com/store/apps/details?id=...
1,Todoist: to-do list & planner,Trusted by 30+ million people and teams worldw...,Trusted by 30+ million people and teams worldw...,Simple yet powerful to-do list. Habit planner ...,"10,000,000+",10000000,36613754,4.469473,251477,15741,...,,Everyone,,False,False,"Nov 18, 2012",1677587146,Varies with device,com.todoist,https://play.google.com/store/apps/details?id=...
2,TickTick:To-do list & Tasks,🥇<b>Great to-do list app for new Android devic...,🥇<b>Great to-do list app for new Android devic...,"Time Management & Planner Reminder, Calendar,...","5,000,000+",5000000,6018277,4.65875,111952,6280,...,,Everyone,,False,False,"Jun 19, 2013",1678170433,Varies with device,com.ticktick.task,https://play.google.com/store/apps/details?id=...
3,Habitica: Gamify Your Tasks,Habitica is a free habit-building and producti...,Habitica is a free habit-building and producti...,Treat your life like a game to stay motivated ...,"1,000,000+",1000000,3896211,4.103703,23819,2614,...,,Everyone,,True,True,"Dec 10, 2015",1678124950,4.1.5,com.habitrpg.android.habitica,https://play.google.com/store/apps/details?id=...
4,Forest: Focus for Productivity,Can't stop scrolling? Lack of self-control? Fo...,Can&#39;t stop scrolling? Lack of self-control...,Stay focused on your goal or to dos and get mo...,"10,000,000+",10000000,30523488,4.736929,569774,8384,...,,Everyone,,True,True,"Aug 25, 2014",1677813216,Varies with device,cc.forestapp,https://play.google.com/store/apps/details?id=...


In [18]:
### scrape app reviews
app_reviews = []

for ap in tqdm(app_packages):
  for score in list(range(1, 6)):
    for sort_order in [Sort.MOST_RELEVANT, Sort.NEWEST]:
      rvs, _ = reviews(
        ap,
        lang='en',
        country='us',
        sort=sort_order,
        count= 200 if score == 3 else 100,
        filter_score_with=score
      )
      for r in rvs:
        r['sortOrder'] = 'most_relevant' if sort_order == Sort.MOST_RELEVANT else 'newest'
        r['appId'] = ap
      app_reviews.extend(rvs)

100%|██████████| 15/15 [01:21<00:00,  5.40s/it]


In [19]:
print_json(app_reviews[0])

{[37m[39;49;00m
[37m  [39;49;00m[94m"appId"[39;49;00m:[37m [39;49;00m[33m"com.anydo"[39;49;00m,[37m[39;49;00m
[37m  [39;49;00m[94m"at"[39;49;00m:[37m [39;49;00m[33m"2023-02-21 01:19:01"[39;49;00m,[37m[39;49;00m
[37m  [39;49;00m[94m"content"[39;49;00m:[37m [39;49;00m[33m"Used to be great app due to unique \"Moment\" feature no one else had. Got buried and de-emphasized in latest update by removing it from long press of the home screen icon. Devs, please return it there. I know how to set a time for it to launch. I know how to trigger it from deep in the settings. Please put it back up front like it was. You can put it along with the \"new event\" and I appreciate that you are doing more calendar and schedule tie ins. Please add it back to the icon long press. Thx."[39;49;00m,[37m[39;49;00m
[37m  [39;49;00m[94m"repliedAt"[39;49;00m:[37m [39;49;00m[33m"2023-02-21 14:37:15"[39;49;00m,[37m[39;49;00m
[37m  [39;49;00m[94m"replyContent"[39;49;00m:

In [20]:
app_reviews_df = pd.DataFrame(app_reviews)
app_reviews_df.to_csv('reviews.csv', index=None, header=True)

In [21]:
app_reviews_df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
0,d570afda-c975-4913-8f7d-43ef510e0c45,Gary O Cims,https://play-lh.googleusercontent.com/a-/ACB-R...,"Used to be great app due to unique ""Moment"" fe...",1,45,5.17.0.68,2023-02-21 01:19:01,We forwarded this feedback to our team and we ...,2023-02-21 14:37:15,most_relevant,com.anydo
1,42367ddf-700c-46d7-b679-39501b5aa06b,Aleigha Pruitt,https://play-lh.googleusercontent.com/a-/ACB-R...,Started a free trial to premium without my per...,1,42,5.17.0.68,2023-02-10 21:49:18,Our 7 days trial allows free users to give Pre...,2023-02-12 16:12:42,most_relevant,com.anydo
2,4907b599-3d12-4c4c-a4d7-1e847de87903,Gabby Markoff,https://play-lh.googleusercontent.com/a/AGNmyx...,All of the great features such as list sharing...,1,19,5.17.0.68,2023-02-12 22:55:41,A payment can only be made upon the user's aut...,2023-02-13 14:02:36,most_relevant,com.anydo
3,7060c17b-d35f-4202-852d-33a98fdcfe91,Megan Shrout,https://play-lh.googleusercontent.com/a/AGNmyx...,The recent update has really limited the featu...,1,2,5.17.0.68,2023-02-17 15:18:59,We did not change the Grocery list functionali...,2023-02-19 14:05:46,most_relevant,com.anydo
4,e3963cdf-5eac-467e-a0d1-58e7a604bf06,Jack The,https://play-lh.googleusercontent.com/a/AGNmyx...,The option to sync with Google Assistant is co...,1,1,,2023-02-20 11:51:37,We are not aware of any issues with the Google...,2023-02-20 13:54:44,most_relevant,com.anydo
