In [32]:
"""
@author: Maxime Pommier
@email: maxime.pommier@outlook.com

Create a csv file of the 500 last comments of an ios application, in a specific country.
> python3 applestore.py
"""


import requests
import pandas as pd

# Take a string and format it to be exported as a CSV
def quote(text):
	return text.replace('"', '""')

separator = "," # Separator between field
country  = "us" # Country code (ex: fr for france, us for united states)
application_id = "324684580" # Application id, you can find it in the URL of the app in the appstore (ex : https://apps.apple.com/fr/app/spotify-musique-et-podcasts/id324684580)
file_name = "rating_ios.csv" # Name of the field where you want to save the data, created automatically. Previous data are deleted after you lauch the script

url = "https://itunes.apple.com/" + country + "/rss/customerreviews/id=" + application_id + "/sortBy=mostRecent/json"
next_page_url = ""

df = pd.DataFrame(columns=["Author","Rating","Version","Comment","Label"])

df

Unnamed: 0,Author,Rating,Version,Comment,Label


In [33]:
while url:
    res = requests.get(url)

    if res.status_code == 200:
        data = res.json()

        if 'entry' in data['feed']:
            print('Go to : %s' % url)
            for content in data['feed']['entry']:
                id_var = quote(content['id']['label'])
                author = quote(content['author']['name']['label'])
                rating = quote(content['im:rating']['label'])
                version = quote(content['im:version']['label'])
                comment = quote(content['content']['label'])
                label = country

                myrow = pd.DataFrame({'Author':author,'Rating':rating,'Version':version,'Comment':comment,'Label':label},index=[id_var])
                df = pd.concat([df,myrow])
                    
            #  We take the next page url, but we have to format it since the response is not ISO with the params we passed
            #  on the wanted format (xml -> json) and we clean up extra unwanted params
            next_page_url = next((item['attributes']['href'] for item in data['feed']['link'] if item['attributes']['rel'] == "next"), None)
            next_page_url = next_page_url.split('?')[0].replace('xml', 'json')


            #  Currently, there is only 10 page max, but on the last one, the next one is declared at the same page at itself.
            #  We have to detect it to avoid a infinite loop.
            url = None if next_page_url == url else next_page_url
        else:
            print('No result, maybe your application_id [%s] is wrong ?' % application_id)
            break

    elif res.status_code == 404:
        print('Page not found, maybe your country code [%s] is wrong ?' % country)
        break
    else:
        print('Status code behaviour not implemented [%s]' % res.status_code)
        break

df.tail()

Go to : https://itunes.apple.com/us/rss/customerreviews/id=324684580/sortBy=mostRecent/json
Go to : https://itunes.apple.com/us/rss/customerreviews/page=2/id=324684580/sortby=mostrecent/json
Go to : https://itunes.apple.com/us/rss/customerreviews/page=3/id=324684580/sortby=mostrecent/json
Go to : https://itunes.apple.com/us/rss/customerreviews/page=4/id=324684580/sortby=mostrecent/json
Go to : https://itunes.apple.com/us/rss/customerreviews/page=5/id=324684580/sortby=mostrecent/json
Go to : https://itunes.apple.com/us/rss/customerreviews/page=6/id=324684580/sortby=mostrecent/json
Go to : https://itunes.apple.com/us/rss/customerreviews/page=7/id=324684580/sortby=mostrecent/json
Go to : https://itunes.apple.com/us/rss/customerreviews/page=8/id=324684580/sortby=mostrecent/json
Go to : https://itunes.apple.com/us/rss/customerreviews/page=9/id=324684580/sortby=mostrecent/json
Go to : https://itunes.apple.com/us/rss/customerreviews/page=10/id=324684580/sortby=mostrecent/json


Unnamed: 0,Author,Rating,Version,Comment,Label
9612425982,Stella Gomes Diaz,2,8.8.8,I really hate we only have six skips. What if ...,us
9612425446,vivian pha,1,8.8.8,ITS SO HARD TO NAVIGATE. ITS CHANGED FOR THE W...,us
9612419332,whyyythisupdate,4,8.8.8,I dislike the new update I can’t pick my own m...,us
9612408689,mayor of Pueblo,5,8.8.8,"Amazing. Can find any song , band. Even if I d...",us
9612374359,lillyfoxpug,4,8.8.8,Ads but still slay,us


In [34]:
df.to_csv(file_name)