# 1. Install and load the necessary packages
All the packages needed from crawling to sentiment analysis can be found on this section

In [2]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import json
from urllib.request import urlopen
from pandas.io.json import json_normalize
from google_play_scraper import app,Sort, reviews
from app_store_scraper import AppStore
from pprint import pprint
import urllib3
import xmltodict
import time
from textblob import TextBlob

# 2. Extract data and create dataset
In this section we will extract all the related reviews from App Store and Google Play Store. Then we will select the necessary columns and generate the final dataset to perform our analysis

In [None]:
##########################################################################################
#########                                                                        #########
#########                     EXTRACT REVIEWS FROM APP STORE                     #########
#########                                                                        #########
##########################################################################################


# Let's extract all the reviews from the selected apps present on the App Store (Apple)

# Select the app names and unique identifiers in the App Store
#   Example for SoundID app: https://apps.apple.com/us/app/soundid-profile-creation/id1490569267 
names_ios = ['revolut' ,'n26-mobile-banking', 'monzo-bank','bunq']
ids_ios = [932493382,956857223,1052238659,1021178150]

## Get all the stores
store_list = ['DZ', 'AO', 'AI', 'AG', 'AR', 'AM', 'AU', 'AT', 'AZ', 'BH', 'BY', 'BE', 'BB',
'BZ', 'BM', 'BO', 'BW', 'BR', 'VG', 'BN', 'BG', 'CA', 'KY', 'CL', 'CN', 'CO', 'CR',
 'HR', 'CY', 'CZ', 'DK', 'DM', 'DO', 'EC', 'EG', 'SV', 'EE', 'FI', 'FR', 'DE',
'GH', 'GR', 'GD', 'GT', 'GY', 'HN', 'HK', 'HU', 'IS', 'IN', 'ID', 'IE', 'IL', 'IT',
'JM', 'JP', 'JO', 'KZ', 'KE', 'KR', 'KW', 'LV', 'LB', 'LT', 'LU', 'MO', 'MK',
'MG', 'MY', 'ML', 'MT', 'MU', 'MX', 'MD', 'MS', 'NP', 'NL', 'NZ', 'NI', 'NE',
'NG', 'NO', 'OM', 'PK', 'PA', 'PY', 'PE', 'PH', 'PL', 'PT', 'QA', 'RO', 'RU', 'SA',
'SN', 'SG', 'SK', 'SI', 'ZA', 'ES', 'LK', 'KN', 'LC', 'VC', 'SR', 'SE', 'CH',
'TW', 'TZ', 'TH', 'BS', 'TT', 'TN', 'TR', 'TC', 'UG', 'GB', 'UA', 'AE', 'UY', 'US',
 'UZ', 'VE', 'VN', 'YE']


#Scrap data from App Store
def crawl_ios(store_list, app_name, app_id):
    # check if store has any reviews
    valid_stores = []
    for i in store_list:
        url = f"https://itunes.apple.com/{i}/rss/customerreviews/id={app_id}/json"
        response = urlopen(url)
        data = response.read()
        data = json.loads(data)
        if data.get('feed').get('entry') == None:
            print(i+' has no reviews')
        else:
            valid_stores.append(i)
            print(i + ' has reviews')
    df=pd.DataFrame()
    name = app_name
    for i in valid_stores:
        country = i
        get_ios = AppStore(country=country, app_name=name)
        get_ios.review()
        df = df.append(pd.DataFrame(get_ios.reviews))
        df = df.assign(App=name)
    return df

# Extract all the reviews only from the countries where there's at least 1 review and create data frame
app_store_reviews = pd.DataFrame()
for num,i in enumerate(names_ios):
    appname =  names_ios[num]
    appid = ids_ios[num]
    print(f'Crawling ios appstore reviews for {appname} app ')
    
    app_store_reviews = app_store_reviews.append(crawl_ios(store_list,appname,appid ))    

In [None]:
# Select the necessary columns and rename them
app_store_reviews = app_store_reviews[['App', 'rating', 'review']]
app_store_reviews = app_store_reviews.rename(columns={'rating': 'Rating', 'review':'Comment'})

In [None]:
##########################################################################################
#########                                                                        #########
#########                 EXTRACT REVIEWS FROM PLAY STORE STORE                  #########
#########                                                                        #########
##########################################################################################

# Set up the apps (name and id) we want to get
names_playstore = [
    'Revolut'
    ,
    'N26'
    ,
    'MonzoBank'
    ,
    'bunq'
        ]

ids_playstore = [
    'com.revolut.revolut'
    ,
    'de.number26.android'
    ,
    'co.uk.getmondo'
    ,
    'com.bunq.android'
      ]

# Set function to get reviews from the specified apps on Google Play Store
def getReviewsPlayStore(ids,appname):
    '''
        params:
         country - ISO-2 country code
        returns:
         dataframe of all of the reviews for country
    '''
    df = pd.DataFrame()
    count_reviews = 50000
    result = reviews(ids,
            lang='en',
            sort= Sort.NEWEST,
            count = count_reviews )
    df =  pd.DataFrame(result)
    df_play = df[['at','content','replyContent','reviewCreatedVersion','score','thumbsUpCount']]
    df_play.columns=['Date','Comment','Replay','Version','Rating','ThumbsUp']
    df_play = df_play.assign(App=appname)
    
    time.sleep(1)
    if (df.empty == False):
        return df_play

# Create dataframe with all the reviews from Google Play Store
df_playstore = pd.DataFrame()
for num,i in enumerate(ids_playstore):
    appname =  names_playstore[num]
    df_playstore = df_playstore.append(getReviewsPlayStore(i,appname))
    df_playstore['Date']=pd.to_datetime(df_playstore['Date']).apply(lambda x: x.date())
df_playstore.head()


# Select the necessary columns and rename them
play_store_reviews = df_playstore[['App', 'Date', 'Rating', 'Comment']]

In [None]:
play_store_reviews.to_csv('play_store_reviews.csv')
app_store_reviews.to_csv('app_store_reviews.csv')