In [1]:
import time
import yaml
import logging
import numpy as np
import pandas as pd
import chardet
from google_play_scraper import app, Sort, reviews_all

In [2]:
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

In [3]:
with open('../conf/base/parameters.yml', 'r') as f:
    params = yaml.safe_load(f)

In [4]:
# check encoding with chardet
with open('../data/01_raw/appstore_reviews.csv', 'rb') as f:
    result = chardet.detect(f.read())
    print(result)

{'encoding': 'Windows-1254', 'confidence': 0.5160149654328776, 'language': 'Turkish'}


In [5]:
df_apple = pd.read_csv('../data/01_raw/appstore_reviews.csv')

In [6]:
googleplayapps = params['googleplayapps']

In [7]:
reviews_df = pd.DataFrame()
for app in googleplayapps:
    logger.info(f"Downloading reviews for {app}")
    rvws = reviews_all(
        app_id=app,
        sleep_milliseconds=1,
        country="br",
        lang="pt",
        sort=Sort.NEWEST,
    )

    rvws_df = pd.DataFrame(rvws)
    rvws_df['storeid'] = app
    reviews_df = pd.concat([reviews_df, rvws_df])
    time.sleep(120)


INFO:__main__:Downloading reviews for br.com.amil.beneficiarios
INFO:__main__:Downloading reviews for br.com.bradseg.segurobradescosaude
INFO:__main__:Downloading reviews for br.com.portoseguro.saudeodonto
INFO:__main__:Downloading reviews for br.com.sulamerica.sam.saude
INFO:__main__:Downloading reviews for br.com.app.centralnacionalunimed


In [8]:
reviews_df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,storeid
0,160e74bc-ba41-492e-bc81-7abcfd6ffa2d,Audrey S A,https://play-lh.googleusercontent.com/a/AGNmyx...,Tem atendido bem as minhas expectativas e func...,5,0,,2023-03-07 14:40:55,"Olá, Audrey. Que alegria saber que sua experiê...",2023-03-07 14:48:22,br.com.amil.beneficiarios
1,6ba884f2-a30a-414d-bf08-cfae4f69f416,Antonio Jose Magalhaes de Oliveira,https://play-lh.googleusercontent.com/a/AGNmyx...,O aplicativo em geral é bom.,5,0,4.55.01,2023-03-07 12:43:55,"Olá, Antonio. Que alegria saber que sua experi...",2023-03-07 14:43:31,br.com.amil.beneficiarios
2,e0abf3e1-77de-4465-a557-920d41ac2eaf,Célia M.,https://play-lh.googleusercontent.com/a-/ACB-R...,No momento estou me tratando no COI. O plano d...,5,0,4.55.01,2023-03-07 10:51:32,"Olá, Célia. Que alegria saber que sua experiên...",2023-03-07 14:43:54,br.com.amil.beneficiarios
3,f8bf1e37-eeea-4120-8f5e-ef487d45ebdf,Regiane Aparecida,https://play-lh.googleusercontent.com/a/AGNmyx...,"O APP é horrível, sempre está em manutenção ou...",1,0,4.55.01,2023-03-07 09:38:30,"Olá, Regiane. Obrigado por sinalizar. Nos envi...",2023-03-07 10:03:00,br.com.amil.beneficiarios
4,c58d5f75-9b92-4c7f-a24b-560c38954b3a,raimunda maria sales,https://play-lh.googleusercontent.com/a/AGNmyx...,"Pessimo cheio de bug, nada interagivo para ter...",1,0,4.55.01,2023-03-07 08:22:06,"Olá, Raimunda. Obrigado por sinalizar. Nos env...",2023-03-07 09:56:22,br.com.amil.beneficiarios


In [9]:
reviews_df['storeid'].value_counts()

br.com.amil.beneficiarios             73330
br.com.sulamerica.sam.saude           28497
br.com.bradseg.segurobradescosaude    16041
br.com.portoseguro.saudeodonto         3627
br.com.app.centralnacionalunimed       2834
Name: storeid, dtype: int64