In [None]:
# https://github.com/googleapis/google-api-python-client/blob/master/docs/oauth.md
# https://developers.google.com/identity/protocols/OAuth2ServiceAccount
# https://pypi.org/project/google-api-python-client/
    
# !pip install google-auth
# !pip install google-auth-oauthlib
# !pip install --upgrade oauth2client
# !pip install google-api-python-client

# !pip install --upgrade google-auth
# !pip install pyjwt

In [None]:
from google.oauth2 import service_account
import googleapiclient.discovery

def get_google_play_reviews():

    results = []

    SCOPES = ['https://www.googleapis.com/auth/androidpublisher']
    SERVICE_ACCOUNT_FILE = 'google_play_api_keys.json'
    
    credentials = service_account.Credentials.from_service_account_file(
            SERVICE_ACCOUNT_FILE, scopes=SCOPES)
    
    playstore = googleapiclient.discovery.build('androidpublisher','v3', credentials=credentials)

    #get first response
    response = playstore.reviews().list(packageName='com.ibotta.android',maxResults=100).execute()

    for i in response['reviews']:  
        results.append(i) 
        
    nextPageToken = response['tokenPagination']['nextPageToken']
    
    print("-----------")
    print(nextPageToken)
    print(len(nextPageToken))
    print("===========")
    
    
# https://stackoverflow.com/questions/28589239/python-facebook-api-cursor-pagination
    while(True):
        try:
#             print(x)

            response = playstore.reviews().list(packageName='com.ibotta.android',maxResults=100,token=nextPageToken).execute()

            nextPageToken = response['tokenPagination']['nextPageToken']

            print("-----------")
            print(nextPageToken)
            print(len(nextPageToken))

            for i in response['reviews']:  
                results.append(i)
                
        except KeyError:
            # When there are no more pages, break from the loop and end the script.
            break
    
    return results

In [None]:
reviews = get_google_play_reviews()

In [None]:
import pandas as pd
from pandas.io.json import json_normalize
from datetime import datetime
import re

def clean_google_store_reviews(json):
#     df_1 = pd.io.json.json_normalize(json['reviews'])
    df_1 = pd.io.json.json_normalize(json)

    df_1 = pd.concat([df_1.drop(['comments'], axis=1), df_1['comments'].apply(pd.Series)], axis=1)
    df_1 = df_1.rename(columns={0:'user_comment',1:'developer_comment'})
    df_1 = df_1.drop(['developer_comment'], axis=1)
    df_1 = pd.concat([df_1.drop(['user_comment'], axis=1), df_1['user_comment'].apply(pd.Series)], axis=1)
    df_1 = pd.concat([df_1.drop(['userComment'], axis=1), df_1['userComment'].apply(pd.Series)], axis=1)
    df_1 = pd.concat([df_1.drop(['lastModified'], axis=1), df_1['lastModified'].apply(pd.Series)], axis=1)
    df_1 = df_1.rename(columns={'seconds':'lastModified'})
    df_1 = df_1.drop(['nanos'], axis=1)
    df_1['lastModified'] = df_1['lastModified'].astype(int)
    df_1['lastModified'] = [datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S') for x in df_1['lastModified']]
    df_1 = df_1.drop(['appVersionCode'], axis=1)
    df_1 = df_1.drop(['reviewerLanguage'], axis=1)
    #probably just want to keep this as dictionary/string so less feilds and can just have 1 app reviews table
    df_1 = pd.concat([df_1.drop(['deviceMetadata'], axis=1), df_1['deviceMetadata'].apply(pd.Series)], axis=1)
    
    
    df_1['text'] = [re.sub(r'\t',' ', x) for x in df_1['text']]
    df_1['text'] = [re.sub(r'\n',' ', x) for x in df_1['text']]
    df_1['text'] = [re.sub(r'\r',' ', x) for x in df_1['text']]
    df_1['text'] = [re.sub(r'\\',' ', x) for x in df_1['text']]
    df_1['text'] = [re.sub(r',','\,', x) for x in df_1['text']]
    df_1['text'] = [re.sub(r'  ',' ', x) for x in df_1['text']]
    df_1['text'] = [x.strip() for x in df_1['text']]
    
#     df_1['deviceMetadata'] = [re.sub(r'\t',' ', x) for x in df_1['deviceMetadata']]
#     df_1['deviceMetadata'] = [re.sub(r'\n',' ', x) for x in df_1['deviceMetadata']]
#     df_1['deviceMetadata'] = [re.sub(r'\r',' ', x) for x in df_1['deviceMetadata']]
#     df_1['deviceMetadata'] = [re.sub(r'\\',' ', x) for x in df_1['deviceMetadata']]
#     df_1['deviceMetadata'] = [re.sub(r',','\,', x) for x in df_1['deviceMetadata']]
#     df_1['deviceMetadata'] = [re.sub(r'  ',' ', x) for x in df_1['deviceMetadata']]
#     df_1['deviceMetadata'] = [x.strip() for x in df_1['deviceMetadata']]
    
    df_1['store'] = 'Google Play'
    df_1['title'] = None
    df_1['apple_votesum'] = None
    df_1['apple_votecount'] = None
    df_1['apple_href'] = None
    
    df_1 = df_1.rename(columns={
                            'lastModified':'updated',
                            'authorName':'author',
                            'reviewId':'id',
                            'text':'summary',
                            'starRating':'rating',
                            'appVersionName':'app_version',
                            'device':'android_device',
                            'thumbsUpCount':'android_thumbsUpCount',
                            'thumbsDownCount':'android_thumbsDownCount',
#                             'deviceMetadata':'android_deviceMetadata',
                           })
    
    df_1 = df_1[['store',
                 'id',
                 'author',
                 'title',
                 'summary',
                 'rating',
                 'app_version',
                 'apple_href',
                 'apple_votesum',
                 'apple_votecount',
                 'android_device',
                 'androidOsVersion',
                 'android_thumbsUpCount',
                 'android_thumbsDownCount',
#                  'android_deviceMetadata',
                 'updated']]
    
    return df_1

In [None]:
df = clean_google_store_reviews(reviews)
df

In [None]:
df.to_csv('results.csv',index=False)