Skip to content

Commit

Permalink
Parse reviews in different languages
Browse files Browse the repository at this point in the history
  • Loading branch information
coagulant committed Mar 14, 2015
1 parent cd93ff4 commit 8e2df9d
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 9 deletions.
1 change: 1 addition & 0 deletions critics/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def poll_store_single_app(self, platform, app_id, notify):
for review in reviews:
if review.id in parsed_review_ids:
continue
logger.debug(review)
parsed_review_ids.add(review.id)
new_reviews.append(review)

Expand Down
21 changes: 15 additions & 6 deletions critics/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from collections import namedtuple
import json
import datetime
import locale
import logging
import re
from time import mktime
Expand Down Expand Up @@ -34,10 +35,17 @@ def __str__(self):
))


def get_ios_reviews(app_id, limit=100):
url = 'https://itunes.apple.com/ru/rss/customerreviews/id=%s/sortBy=mostRecent/xml' % app_id
def get_ios_reviews(app_id, language=None, limit=100):
if not language:
try:
language = locale.getdefaultlocale()[0][:2]
except ValueError:
language = 'en'
url = 'https://itunes.apple.com/%(language)srss/customerreviews/id=%(app_id)s/sortBy=mostRecent/xml' % {
'language': '%s/' % language, 'app_id': app_id}
response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)'},
timeout=1)
response.encoding = 'utf-8' # avoid chardet not guessing correctly
feed = feedparser.parse(response.text)
reviews = [Review(
id=entry.id,
Expand All @@ -53,11 +61,12 @@ def get_ios_reviews(app_id, limit=100):
return reviews


def get_android_reviews(app_id, limit=100):
def get_android_reviews(app_id, language=None, limit=100):
url = 'https://play.google.com/store/getreviews'
response = requests.post(url, data={'xhr': 1, 'id': app_id, 'reviewSortOrder': 0,
'pageNum': 0, 'reviewType': 0},
timeout=1)
payload = {'xhr': 1, 'id': app_id, 'reviewSortOrder': 0, 'pageNum': 0, 'reviewType': 0}
if language:
payload['hl'] = language
response = requests.post(url, data=payload, timeout=1)
json_source = response.text[response.text.find('['):]
response_as_json = json.loads(json_source)
try:
Expand Down
8 changes: 5 additions & 3 deletions tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@

@responses.activate
def test_ios():
responses.add(responses.GET, 'https://itunes.apple.com/ru/rss/customerreviews/id=123/sortBy=mostRecent/xml',
responses.add(responses.GET, 'https://itunes.apple.com/fr/rss/customerreviews/id=123/sortBy=mostRecent/xml',
body=codecs.open('tests/fixtures/itunes_fr.example', encoding='utf-8').read(),
content_type='application/xml; charset=UTF-8')
reviews = get_ios_reviews(123)
reviews = get_ios_reviews(123, language='fr')

assert len(reviews) == 50

Expand Down Expand Up @@ -41,6 +41,7 @@ def test_android_ru():
content_type='application/json; charset=UTF-8')
reviews = get_android_reviews('com.skype.raider', limit=10)

assert 'hl=en' not in responses.calls[0].request.body
assert len(reviews) == 10

review = reviews[0]
Expand All @@ -61,8 +62,9 @@ def test_android_en():
responses.add(responses.POST, 'https://play.google.com/store/getreviews',
body=codecs.open('tests/fixtures/gp_en.example', encoding='utf-8').read(),
content_type='application/json; charset=UTF-8')
reviews = get_android_reviews('com.skype.raider', limit=10)
reviews = get_android_reviews('com.skype.raider', language='en', limit=10)

assert 'hl=en' in responses.calls[0].request.body
assert len(reviews) == 10

review = reviews[0]
Expand Down

0 comments on commit 8e2df9d

Please sign in to comment.