# Apple iOS app reviews API
Extract iOS app reviews through official API and transforming data into a CSV file

In [0]:
import time
import typing
import requests

## Extract JSON from API

Returns `JSON response` if any.
<br>
Returns `None` if no JSON found.



In [0]:
def get_json(url) -> typing.Union[dict, None]:
    """
    :param url:
        The url to get the JSON from.
    """
    response = requests.get(url)
    if is_error_response(response):
        return None
    json_response = response.json()
    return json_response

### Check if requests response is success or not

Returns `False` if status_code is 503 (*system unavailable*) or 200 (*success*), otherwise it will return `True` (*failed*).
<br>
This function should be used after calling the commands requests.post() and requests.get().

In [0]:
def is_error_response(http_response, seconds_to_sleep: float = 1) -> bool:
    """
    :param http_response:
        The response object returned from requests.post or requests.get.
    :param seconds_to_sleep:
        The sleep time used if the status_code is 503. This is used to not
        overwhelm the service since it is unavailable.
    """
    if http_response.status_code == 503:
        time.sleep(seconds_to_sleep)
        return False

    return http_response.status_code != 200

## Get reviews from Apple itunes API server

Returns a list of dictionaries with each dictionary being one review.
<br>
Maxed out at 500 reviews from iOS server.

In [0]:
def get_reviews(app_name, app_id, page=1) -> typing.List[dict]:
    """
    :param app_name:
        The name of the app for reference on dataset
    :param app_id:
        The id of the app on the itunes store
    :param page:
        The page id to start the loop. Once it reaches the final page + 1, the 
        app will not return a 'feed' key in the json, thus it will exit with 
        the current reviews.
    """
    # Declare a variable with list of dictionary type
    reviews: typing.List[dict] = [{}]

    while True:
        # Requesting JSON from iOS API server
        url = (f'https://itunes.apple.com/rss/customerreviews/id={app_id}/'
               f'page={page}/sortby=mostrecent/json')
        
        # Returns JSON response if any
        json = get_json(url)

        if not json:
            return reviews[1:]

        data_feed = json.get('feed')

        if not data_feed.get("entry"):
            return reviews[1:]

        reviews += [
            {
                'app_name': app_name,
                'app_version': entry.get('im:version').get('label'),
                'review_title': entry.get('title').get('label'),
                'review_text': entry.get('content').get('label'),
                'review_rating': entry.get('im:rating').get('label')                
            }
            for entry in data_feed.get('entry')
        ]

        # Next page of reviews on the itunes store
        page += 1

In [0]:
# List of apps name and ID to extract reviews from itunes API

# Lets find RV related app reviews
app_list = [['rv-life-rv-gps-campgrounds', 1275803975], 
            ['copilot-gps-navigation', 504677517], 
            ['camp-rv-tents-to-rv-parks', 370820516], 
            ['smartrvroute', 606236753], 
            ['rv-parks-campgrounds', 991632449], 
            ['rv-park-finder', 695969271], 
            ['campendium-rv-tent-camping', 1191380095]]

In [0]:
import pandas as pd

# Convert a list of app reviews from JSON to CSV file
def dict_to_csv(app_list):
  df_list = []
  for app_name, app_id in app_list:
    # get the reviews for specific app
    app_reviews = get_reviews(app_name, app_id) 

    # create dataframe with list of dictionaries
    df = pd.DataFrame(app_reviews) 

    # add into the list of dataframes
    df_list.append(df) 
    
  # merge all dataframes together
  df_all = pd.concat(df_list, ignore_index=True, join='outer', axis=0)
  return df_all

In [0]:
df = dict_to_csv(app_list)
print(df.shape)
df.head()

(2008, 5)


Unnamed: 0,app_name,app_version,review_title,review_text,review_rating
0,rv-life-rv-gps-campgrounds,2.5.1,Essential for trip planning,The RV Trip Wizard is very very helpful planni...,5
1,rv-life-rv-gps-campgrounds,2.5.1,Great app! Excellent customer service.,I love that it integrates with RV Trip Wizard....,5
2,rv-life-rv-gps-campgrounds,2.5.0,Great content but keeps crashing,Like the content but keeps crashing on me - on...,2
3,rv-life-rv-gps-campgrounds,2.5.0,Love this App,I use this app for every detail of my RV trip ...,5
4,rv-life-rv-gps-campgrounds,2.5.0,Go reference,We always check out the sites for our trips.,5


## Create CSV from dataframe

In [0]:
# Save dataframe as CSV to share/analyze
df_all.to_csv("itunes_rv_app_reviews.csv", index=False)

In [0]:
import pandas as pd
df = pd.read_csv('itunes_rv_app_reviews.csv')
print(df.shape)
df.head()

Unnamed: 0,app_name,app_version,review_title,review_text,review_rating
0,rv-life-rv-gps-campgrounds,2.5.1,Essential for trip planning,The RV Trip Wizard is very very helpful planni...,5
1,rv-life-rv-gps-campgrounds,2.5.1,Great app! Excellent customer service.,I love that it integrates with RV Trip Wizard....,5
2,rv-life-rv-gps-campgrounds,2.5.0,Great content but keeps crashing,Like the content but keeps crashing on me - on...,2
3,rv-life-rv-gps-campgrounds,2.5.0,Love this App,I use this app for every detail of my RV trip ...,5
4,rv-life-rv-gps-campgrounds,2.5.0,Go reference,We always check out the sites for our trips.,5


### Connecting to MongoDB through driver connection
- Create cluster, admin user, whitelist IP address
- Save admin password
- Make sure Python version is 3.4+
- Download and import pymongo

In [75]:
!pip install pymongo
!python --version
!curl ipecho.net/plain

Python 3.6.9
35.184.46.90

In [9]:
import pymongo

password = "..."
connection_string = f"mongodb://admin:{password}@cluster-aws-free-shard-00-00-\
ukfgg.mongodb.net:27017,cluster-aws-free-shard-00-01-ukfgg.mongodb.net:27017,\
cluster-aws-free-shard-00-02-ukfgg.mongodb.net:27017/test?ssl=true&replicaSet=\
Cluster-AWS-FREE-shard-0&authSource=admin&retryWrites=true&w=majority")

# Using Python Driver 3.4 version connection string
client = pymongo.MongoClient(credentials)
db = client.test; db

Database(MongoClient(host=['cluster-aws-free-shard-00-02-ukfgg.mongodb.net:27017', 'cluster-aws-free-shard-00-00-ukfgg.mongodb.net:27017', 'cluster-aws-free-shard-00-01-ukfgg.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, ssl=True, replicaset='Cluster-AWS-FREE-shard-0', authsource='admin', retrywrites=True, w='majority'), 'test')

In [23]:
# Check if client nodes are working properly
client.nodes

frozenset({('cluster-aws-free-shard-00-00-ukfgg.mongodb.net', 27017),
           ('cluster-aws-free-shard-00-01-ukfgg.mongodb.net', 27017),
           ('cluster-aws-free-shard-00-02-ukfgg.mongodb.net', 27017)})

In [24]:
# Create database and collection
reviews_collection = client.itune_reviews.app_reivews; reviews_collection

Collection(Database(MongoClient(host=['cluster-aws-free-shard-00-02-ukfgg.mongodb.net:27017', 'cluster-aws-free-shard-00-00-ukfgg.mongodb.net:27017', 'cluster-aws-free-shard-00-01-ukfgg.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, ssl=True, replicaset='Cluster-AWS-FREE-shard-0', authsource='admin', retrywrites=True, w='majority'), 'itune_reviews'), 'app_reivews')

In [73]:
# Load app reviews in to collection
for app_name, app_id in app_list:
  app_reviews_list = get_reviews(app_name, app_id)
  reviews_collection.insert_many(app_reviews_list)
# Check document total count
reviews_collection.estimated_document_count()

2008

In [84]:
# Number of 5 ratings reviews
reviews_collection.count_documents({'review_rating': '5'})

1176

In [115]:
# Documents of 1 rating reviews from a specific app and version
list(reviews_collection.find({'app_name': 'rv-life-rv-gps-campgrounds', 
                              'review_rating': '1',
                              'app_version': '2.2.0'}))

[{'_id': ObjectId('5e61456d7971a3d29c1efeaf'),
  'app_name': 'rv-life-rv-gps-campgrounds',
  'app_version': '2.2.0',
  'review_rating': '1',
  'review_text': 'Will not do anything without paid subscription.',
  'review_title': 'Does nothing'},
 {'_id': ObjectId('5e61456d7971a3d29c1efeb0'),
  'app_name': 'rv-life-rv-gps-campgrounds',
  'app_version': '2.2.0',
  'review_rating': '1',
  'review_text': 'Tried several times to obtain a verification email and never receive one. I look in my spam folder to ensure all boxes are checked...MO JOY.',
  'review_title': 'Don’t receive confirmation email'}]

In [0]:
# Full list of collection
list(reviews_collection.find())

### Removing data from MongoDB

In [0]:
"""Remove all documents in a collection"""
# client.database_name.collection_name.drop()

"""Remove collection from database"""
# client.database_name.drop_collection("collection_name")

"""Remove database from cluster"""
# client.drop_database("database_name")