In [1]:
pip install google-play-scraper

Collecting google-play-scraper
  Downloading google_play_scraper-1.2.2-py3-none-any.whl (28 kB)
Installing collected packages: google-play-scraper
Successfully installed google-play-scraper-1.2.2
Note: you may need to restart the kernel to use updated packages.


# Loading the Libraries

In [5]:
import json
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import TerminalFormatter
from google_play_scraper import Sort, reviews, app

In [6]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
sns.set(style='whitegrid', palette='muted', font_scale=1.2)

In [8]:
app_packages = ['com.ticktick.task']

1

In [9]:
app_info = []
for ap in tqdm(app_packages):
    info = app(ap,lang='en',country='us')
    del info['comments']
    app_info.append(info)

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.17it/s]


In [10]:
def print_json(json_object):
    json_str = json.dumps(json_object,indent=2,default=str)
    print(highlight(json_str,JsonLexer(),TerminalFormatter()))

In [11]:
print_json(app_info[0])

{
  [94m"title"[39;49;00m: [33m"TickTick:To-do list & Tasks"[39;49;00m,
  [94m"description"[39;49;00m: [33m"\ud83e\udd47<b>Great to-do list app for new Android device</b> - The Verge\r\n\ud83e\udd47<b>The best to-do app for Android</b> - MakeUseOf\r\n\ud83e\udd47<b>The best to-do list app for 2020</b> - Wirecutter (A New York Times Company)\r\n\ud83d\ude4cMKBHD's favorite productivity tool\r\n\r\nTickTick is a simple and effective to-do list and task manager app which helps you make schedule, manage time, stay focused, remind about deadlines and organize life at home, work and everywhere else.\ud83d\uddd3 \ud83d\udd14 \u2714\r\n\r\nTickTick helps you make the most of your day and get things done (GTD). Whether there is an idea you want to capture, personal goals to achieve, work to accomplish, habits to track, projects to collaborate with colleagues, or even a shopping list to share with family (with the help of a list maker). Achieve your goals with our productivity planner.\r\

In [12]:
def format_title(title):
    step_index = title.find(':') if title.find(':') !=-1 else title.find('-')
    if sep_index !=-1:
        title = title[:sep_index]
        return title[:10]
    fig,axs = plt.subplots(2,len(app_info) // 2, figsize=(14,5))
    
    for i, ax in enumerate(axs.flat):
        ai=app_info[i]
        img=plt.imread(ai['icon'])
        ax.iamshow(img)
        ax.set_title(format_title(ai['title']))
        ax.axis('off')
     
                

In [13]:
app_info_df = pd.DataFrame(app_info)
app_info_df.to_csv('apps_csv',index=None,header=True)

# Scraping App Reviews

In [30]:
app_reviews = []

for ap in tqdm(app_packages):
  for score in list(range(1, 6)):
    for sort_order in [Sort.MOST_RELEVANT, Sort.NEWEST]:
      rvs, _ = reviews(
        ap,
        lang='en',
        country='us',
        sort=sort_order,
        count=10  if score == 2 else 60,
        filter_score_with=score
      )
      for r in rvs:
        r['sortOrder'] = 'most_relevant' if sort_order == Sort.MOST_RELEVANT else 'newest'
        r['appId'] = ap
      app_reviews.extend(rvs)

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.36s/it]


In [32]:
print_json(app_reviews[0])

{
  [94m"reviewId"[39;49;00m: [33m"d413330d-832d-45fb-94b0-67ae82652e06"[39;49;00m,
  [94m"userName"[39;49;00m: [33m"Abhinav Konagala"[39;49;00m,
  [94m"userImage"[39;49;00m: [33m"https://play-lh.googleusercontent.com/a-/AFdZucpxQwkhBJ2x_Bv8_AXZm5nBsmWGRbVs6LVt0mhwpB4"[39;49;00m,
  [94m"content"[39;49;00m: [33m"Great app with lots of features, I even subscribed for the full version. However, I just can't seem to open the app. Whenever I open the app, it closes down. I have to open the app 10-15 times for it to stay open to add my tasks or check my calendar, which just sucks. I'm not sure if this bug is being worked on at the moment but it's definitely making me want to delete the app and get a refund on my subscription."[39;49;00m,
  [94m"score"[39;49;00m: [34m1[39;49;00m,
  [94m"thumbsUpCount"[39;49;00m: [34m30[39;49;00m,
  [94m"reviewCreatedVersion"[39;49;00m: [33m"6.2.9.1"[39;49;00m,
  [94m"at"[39;49;00m: [33m"2022-06-20 18:41:15"[39;49;00m,
  [94m"r

In [31]:
len(app_reviews)

500

In [33]:
app_reviews_df = pd.DataFrame(app_reviews)
app_reviews_df.to_csv('reviews.csv', index=None, header=True)

In [41]:
data = pd.read_csv("reviews.csv")
data.shape
data.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
0,d413330d-832d-45fb-94b0-67ae82652e06,Abhinav Konagala,https://play-lh.googleusercontent.com/a-/AFdZu...,"Great app with lots of features, I even subscr...",1,30,6.2.9.1,2022-06-20 18:41:15,,,most_relevant,com.ticktick.task
1,cbe4e3af-02dd-44d0-98da-406fd1d8ba11,A M,https://play-lh.googleusercontent.com/a/AItbvm...,I've been a loyal TickTick user for years. Ove...,1,81,6.2.4.1,2022-03-25 18:09:01,,,most_relevant,com.ticktick.task
2,7e6c0022-bfee-45e0-b77d-6c613d0f1e4f,Jenn Bishop,https://play-lh.googleusercontent.com/a-/AFdZu...,I used to love TickTick for the combination of...,1,4,,2020-09-28 22:52:03,"Hi Jenn, sorry to hear this. The issue you des...",2020-10-01 12:57:16,most_relevant,com.ticktick.task
3,4efdd6e9-fac2-473f-be4f-d3b0f22a1d4a,Ashley Pickrell,https://play-lh.googleusercontent.com/a/AItbvm...,Transitioned over to Tick Tick from Wunderlist...,1,47,5.9.1.1,2021-03-01 02:06:46,"Hi there, sorry for the inconvenience. This is...",2021-03-01 06:39:48,most_relevant,com.ticktick.task
4,980efdc8-3a96-458c-bab5-e3cb20977983,Derek Myers,https://play-lh.googleusercontent.com/a-/AFdZu...,They double billed me for my premium subscript...,1,40,5.7.0,2020-06-16 21:41:52,,,most_relevant,com.ticktick.task
