# Webscraping Play Store & Normalização dos dados

In [1]:
!pip3 install google-play-scraper

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [2]:
import csv
from textblob import TextBlob
import pandas as pd

In [3]:
import re
def clean_text(text):
    # Remova emojis e outros caracteres não ASCII
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)
    
    # Remova caracteres especiais
    text = re.sub(r'[^\w\s]', '', text)
    
    # Substitua múltiplos espaços por um único espaço
    text = re.sub(r'\s+', ' ', text)
    
    return text.strip()


In [4]:
def convert_data_to_sentiment_data(raw_data):
    if isinstance(raw_data['content'], str):
        clean_content = clean_text(raw_data['content'])
        blob = TextBlob(clean_content)
        return {
            'App': 'Your App Name',  # Substitua pelo nome do aplicativo correspondente
            'Translated_Review': clean_content,
            'Sentiment': 'Positive' if blob.sentiment.polarity > 0 else 'Negative' if blob.sentiment.polarity < 0 else 'Neutral',
            'Sentiment_Polarity': blob.sentiment.polarity,
            'Sentiment_Subjectivity': blob.sentiment.subjectivity,
            'date': raw_data['at'],
        }
    else:
        print(f"Skipping review due to non-string content: {raw_data['content']}")
              

In [5]:
def search_reviews(app_name):
    from google_play_scraper import app, Sort, reviews_all

    # ID do aplicativo no Google Play Store
    app_id = app_name

    # Recuperar informações do aplicativo
    gmail_app_info = app(app_id)

    # Exibir informações básicas do aplicativo
    print(f"App Title: {gmail_app_info['title']}")
    print(f"Developer: {gmail_app_info['developer']}")
    print(f"Rating: {gmail_app_info['score']}")

    # Obter todas as avaliações do aplicativo
    gmail_reviews = reviews_all(
        app_id,
        sleep_milliseconds=0,  # Pausa entre solicitações (recomendado)
        lang="en",  # Idioma das avaliações
        country="us",  # País das avaliações
        sort=Sort.MOST_RELEVANT,  # Ordem de classificação das avaliações
    )

    print("-*-" * 10)
    print(f"Reviews: {gmail_reviews}")

    # Exibir as avaliações
    sentiment_data_list = []
    for i, review in enumerate(gmail_reviews):
        sentiment_data = convert_data_to_sentiment_data(review)
        if sentiment_data is not None:
            sentiment_data_list.append(sentiment_data)
    
    df = pd.DataFrame(sentiment_data_list)
    df.to_csv('sentiment_data.csv', index=False)
    df.head(10)

In [6]:
# 'com.paypal.android.p2pmobile'
# 'br.com.renanrodrigues.ifgmobile'
apps = [
    'com.paypal.android.p2pmobile'
]
apps

['com.paypal.android.p2pmobile']

In [7]:
for app in apps:
    search_reviews(app)

App Title: PayPal - Send, Shop, Manage
Developer: PayPal Mobile
Rating: 4.2279563
-*--*--*--*--*--*--*--*--*--*-


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string conten

Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string content: None
Skipping review due to non-string conten

In [8]:
print('The end!')

The end!
