In [1]:
# !pip install itunes-app-scraper-dmi

In [2]:
# !pip install app_store_scrapper

In [3]:
import pandas as pd

# for scraping app info from App Store
from itunes_app_scraper.scraper import AppStoreScraper

# for scraping app reviews from App Store
from app_store_scraper import AppStore

# for pretty printing data structures
from pprint import pprint

# for keeping track of timing
import datetime as dt
from tzlocal import get_localzone

# for building in wait times
import random
import time

## Set up loop to go through all apps
import os

In [4]:
# INI DIGANTI NAMA APP-NYA SAMA ID NYA
column = {'iOS_app_name':['tokopedia'],	'iOS_app_id':['1001394201']}

app_df = pd.DataFrame(column)
app_df.head()

Unnamed: 0,iOS_app_name,iOS_app_id
0,tokopedia,1001394201


In [5]:
## Get list of app names and app IDs
app_names = list(app_df['iOS_app_name'])
app_ids = list(app_df['iOS_app_id'])

In [6]:
## Set up App Store Scraper
scraper = AppStoreScraper()
app_store_list = list(scraper.get_multiple_app_details(app_ids))

## Pretty print the data for the first app
pprint(app_store_list[0])

{'advisories': '',
 'appletvScreenshotUrls': '',
 'artistId': 1001394200,
 'artistName': 'TOKOPEDIA PT',
 'artistViewUrl': 'https://apps.apple.com/nl/developer/tokopedia-pt/id1001394200?uo=4',
 'artworkUrl100': 'https://is2-ssl.mzstatic.com/image/thumb/Purple126/v4/77/6a/31/776a31a9-4994-610d-8688-a84397619e1a/MainAppIcon-0-0-1x_U007emarketing-0-7-0-85-220.png/100x100bb.jpg',
 'artworkUrl512': 'https://is2-ssl.mzstatic.com/image/thumb/Purple126/v4/77/6a/31/776a31a9-4994-610d-8688-a84397619e1a/MainAppIcon-0-0-1x_U007emarketing-0-7-0-85-220.png/512x512bb.jpg',
 'artworkUrl60': 'https://is2-ssl.mzstatic.com/image/thumb/Purple126/v4/77/6a/31/776a31a9-4994-610d-8688-a84397619e1a/MainAppIcon-0-0-1x_U007emarketing-0-7-0-85-220.png/60x60bb.jpg',
 'averageUserRating': 4.83645,
 'averageUserRatingForCurrentVersion': 4.83645,
 'bundleId': 'com.tokopedia.Tokopedia',
 'contentAdvisoryRating': '4+',
 'currency': 'EUR',
 'currentVersionReleaseDate': '2023-05-31T07:23:33Z',
 'description': 'Tokopedia,

In [7]:
## Convert list of dicts to Pandas DataFrame and write to csv
app_info_df = pd.DataFrame(app_store_list)
app_info_df.to_csv('./appsDetail.csv', index=False)
app_info_df.head()

Unnamed: 0,isGameCenterEnabled,artworkUrl60,artworkUrl512,artworkUrl100,artistViewUrl,screenshotUrls,advisories,features,supportedDevices,ipadScreenshotUrls,...,sellerName,primaryGenreName,primaryGenreId,currency,currentVersionReleaseDate,trackId,trackName,version,wrapperType,userRatingCount
0,False,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,https://apps.apple.com/nl/developer/tokopedia-...,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,,iosUniversal,"iPhone5s-iPhone5s,iPadAir-iPadAir,iPadAirCellu...",https://is3-ssl.mzstatic.com/image/thumb/Purpl...,...,TOKOPEDIA PT,Shopping,6024,EUR,2023-05-31T07:23:33Z,1001394201,Tokopedia,2.227.0,software,214


In [9]:
for app_name, app_id in zip(app_names, app_ids):
    
    # Get start time
    start = dt.datetime.now(tz=get_localzone())
    fmt= "%m/%d/%y - %T %p"
    
    # Print starting output for app
    print('---'*20)
    print('---'*20)    
    print(f'***** {app_name} started at {start.strftime(fmt)}')
    print()
    
    # Instantiate AppStore for app
    app_ = AppStore(country='id', app_name=app_name, app_id=app_id)
    
    # Scrape reviews posted since February 28, 2020 and limit to 10,000 reviews
    app_.review(how_many = 20000)
    
    reviews = app_.reviews
    
    # Add keys to store information about which app each review is for
    for rvw in reviews:
        rvw['app_name'] = app_name
        rvw['app_id'] = app_id
    
    # Print update that scraping was completed
    print(f"""Done scraping {app_name}. 
    Scraped a total of {app_.reviews_count} reviews.\n""")
    
    # Convert list of dicts to Pandas DataFrame and write to csv
    output_path = './' + app_name + '.csv' 
    review_df = pd.DataFrame(reviews)
    review_df.to_csv(output_path, mode='a', header=not os.path.exists(output_path))
    
    # Get end time
    end = dt.datetime.now(tz=get_localzone())
    
    # Print ending output for app
    print(f"""Successfully wrote {app_name} reviews to csv
    at {end.strftime(fmt)}.\n""")
    print(f'Time elapsed for {app_name}: {end-start}')
    print('---'*20)
    print('---'*20)
    print('\n')
    
    # Wait 5 to 10 seconds to start scraping next app
    time.sleep(random.randint(5,10))

------------------------------------------------------------
------------------------------------------------------------
***** tokopedia started at 06/08/23 - 22:01:32 PM



2023-06-08 22:01:34,048 [INFO] Base - Initialised: AppStore('id', 'tokopedia', 1001394201)
2023-06-08 22:01:34,049 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/id/app/tokopedia/id1001394201
2023-06-08 22:01:39,801 [INFO] Base - [id:1001394201] Fetched 280 reviews (280 fetched in total)
2023-06-08 22:01:45,436 [INFO] Base - [id:1001394201] Fetched 520 reviews (520 fetched in total)
2023-06-08 22:01:51,736 [INFO] Base - [id:1001394201] Fetched 860 reviews (860 fetched in total)
2023-06-08 22:01:57,300 [INFO] Base - [id:1001394201] Fetched 1100 reviews (1100 fetched in total)
2023-06-08 22:02:02,677 [INFO] Base - [id:1001394201] Fetched 1360 reviews (1360 fetched in total)
2023-06-08 22:02:09,034 [INFO] Base - [id:1001394201] Fetched 1700 reviews (1700 fetched in total)
2023-06-08 22:02:14,388 [INFO] Base - [id:1001394201] Fetched 2040 reviews (2040 fetched in total)
2023-06-08 22:02:19,864 [INFO] Base - [id:1001394201] Fetched 2380 reviews (2380 fetched in total)
202

2023-06-08 22:09:33,891 [INFO] Base - [id:1001394201] Fetched 17980 reviews (17980 fetched in total)
2023-06-08 22:09:39,477 [INFO] Base - [id:1001394201] Fetched 18160 reviews (18160 fetched in total)
2023-06-08 22:09:46,105 [INFO] Base - [id:1001394201] Fetched 18340 reviews (18340 fetched in total)
2023-06-08 22:09:51,739 [INFO] Base - [id:1001394201] Fetched 18500 reviews (18500 fetched in total)
2023-06-08 22:09:58,708 [INFO] Base - [id:1001394201] Fetched 18720 reviews (18720 fetched in total)
2023-06-08 22:10:04,674 [INFO] Base - [id:1001394201] Fetched 18940 reviews (18940 fetched in total)
2023-06-08 22:10:10,646 [INFO] Base - [id:1001394201] Fetched 19120 reviews (19120 fetched in total)
2023-06-08 22:10:16,271 [INFO] Base - [id:1001394201] Fetched 19280 reviews (19280 fetched in total)
2023-06-08 22:10:23,137 [INFO] Base - [id:1001394201] Fetched 19500 reviews (19500 fetched in total)
2023-06-08 22:10:28,748 [INFO] Base - [id:1001394201] Fetched 19680 reviews (19680 fetched 

Done scraping tokopedia. 
    Scraped a total of 20000 reviews.

Successfully wrote tokopedia reviews to csv
    at 06/08/23 - 22:10:39 PM.

Time elapsed for tokopedia: 0:09:06.082357
------------------------------------------------------------
------------------------------------------------------------


