# App Review Crawling

## 1.  Load the libraries

In [1]:
# install the necessary library
!pip install googletrans==3.1.0a0



In [2]:
import pandas as pd
import xmltodict
import requests
import os
import csv
from urllib.request import urlopen

from googletrans import Translator

## 2. Creating an Apple Store crawler 

In [3]:
# -*- coding: utf-8 -*-
def get_url_index(url):
    response = requests.get(url).content.decode('utf8')
    xml = xmltodict.parse(response)

    last_url = [l['@href'] for l in xml['feed']['link'] if (l['@rel'] == 'last')][0]
    last_index = [int(s.replace('page=', '')) for s in last_url.split('/') if ('page=' in s)][0]

    return last_index


def appstore_crawler(appid, outfile='./appstore_reviews.csv'):
    #appid 1445504255 (coupang east app)
    url = 'https://itunes.apple.com/us/rss/customerreviews/page=1/id=1445504255/sortby=mostrecent/xml'

    try:
        last_index = get_url_index(url)
    except Exception as e:
        print (url)
        print ('\tNo Reviews: appid 1445504255')
        print ('\tException:', e)
        return

    result = list()
    for idx in range(1, last_index+1):
        url = "https://itunes.apple.com/us/rss/customerreviews/page=1/id=1445504255/sortby=mostrecent/xml?urlDesc=/customerreviews/id=1445504255/sortBy=mostRecent/xml" 
        print(url)

        response = requests.get(url).content.decode('utf8')
        try:
            xml = xmltodict.parse(response)
        except Exception as e:
            print ('\tXml Parse Error %s\n\tSkip %s :' %(e, url))
            continue

        try:
            num_reivews= len(xml['feed']['entry'])
        except Exception as e:
            print ('\tNo Entry', e)
            continue

        try:
            xml['feed']['entry'][0]['author']['name']
            single_reviews = False
        except:
            single_reviews = True
            pass

        if single_reviews:
                result.append({
                    'STAR': int(xml['feed']['entry']['im:rating']),
                    'REVIEW': xml['feed']['entry']['content'][0]['#text'],
                })
        else:
            for i in range(len(xml['feed']['entry'])):
                result.append({
                    'STAR': int(xml['feed']['entry'][i]['im:rating']),
                    'REVIEW': xml['feed']['entry'][i]['content'][0]['#text'],
                })

    res_df = pd.DataFrame(result)
    res_df.to_csv(outfile, encoding='utf-8-sig', index=False)
    print ('Save reviews to file: %s \n' %(outfile))


if __name__ == '__main__':
    app_id = 1445504255
    outfile = os.path.join('appstore' + str(app_id)+'.csv')
    appstore_crawler(app_id, outfile=outfile)

https://itunes.apple.com/us/rss/customerreviews/page=1/id=1445504255/sortby=mostrecent/xml?urlDesc=/customerreviews/id=1445504255/sortBy=mostRecent/xml
https://itunes.apple.com/us/rss/customerreviews/page=1/id=1445504255/sortby=mostrecent/xml?urlDesc=/customerreviews/id=1445504255/sortBy=mostRecent/xml
Save reviews to file: appstore1445504255.csv 



In [4]:
df = pd.read_csv("appstore1445504255.csv")

## 3. Translate the column from Korean to English.

In [5]:
translator = Translator()

df['FEEDBACK'] = df['REVIEW'].apply(lambda x: translator.translate(x, src='ko', dest='en').text)

print(df)

    STAR                                             REVIEW  \
0      5                                                조아요   
1      1                       배달이 늦은걸 식당에서 조리가 늦었다고 핑계를 대고   
2      5                                              BGETS   
3      1  i’ve spent thousands and thousands of dollars ...   
4      5    쿠팡이츠 타 돈네는 불편하다고 하네요! 근데 저희 동네는 쿠팡이츠가 가방 좋음 ㅋㅋㅋ   
..   ...                                                ...   
95     1    첫주문쿠폰 받으러더니 받을려고 들어가니\n쿠폰이 다 떨어졌데... 장난하니? 바로삭제   
96     4  The app itself is really fast and convenient t...   
97     5                                                 히히   
98     1  I was in love with this app and never had a co...   
99     5                                     안되던되도 배달 되서 좋음   

                                             FEEDBACK  
0                                          Tighten up  
1   The excuse that the delivery was late was beca...  
2                                               BGETS  
3   i’ve spent thou

In [6]:
data = df.drop(columns = ['REVIEW'])

In [10]:
display(df)
display('English.ver', data)

Unnamed: 0,STAR,REVIEW,FEEDBACK
0,5,조아요,Tighten up
1,1,배달이 늦은걸 식당에서 조리가 늦었다고 핑계를 대고,The excuse that the delivery was late was beca...
2,5,BGETS,BGETS
3,1,i’ve spent thousands and thousands of dollars ...,i’ve spent thousands and thousands of dollars ...
4,5,쿠팡이츠 타 돈네는 불편하다고 하네요! 근데 저희 동네는 쿠팡이츠가 가방 좋음 ㅋㅋㅋ,Coupang Eats Ta Donne says it’s inconvenient! ...
...,...,...,...
95,1,첫주문쿠폰 받으러더니 받을려고 들어가니\n쿠폰이 다 떨어졌데... 장난하니? 바로삭제,I went in to get my first order coupon.\nI'm o...
96,4,The app itself is really fast and convenient t...,The app itself is really fast and convenient t...
97,5,히히,Hi-Hi
98,1,I was in love with this app and never had a co...,I was in love with this app and never had a co...


'English.ver'

Unnamed: 0,STAR,FEEDBACK
0,5,Tighten up
1,1,The excuse that the delivery was late was beca...
2,5,BGETS
3,1,i’ve spent thousands and thousands of dollars ...
4,5,Coupang Eats Ta Donne says it’s inconvenient! ...
...,...,...
95,1,I went in to get my first order coupon.\nI'm o...
96,4,The app itself is really fast and convenient t...
97,5,Hi-Hi
98,1,I was in love with this app and never had a co...


In [11]:
# save to csv
data.to_csv('data.csv', index=False)