<a href="https://colab.research.google.com/github/ghoshsagnik/sentiment-analysis-google-play/blob/main/Google_play_sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required libraries
!pip install flask flask-ngrok flask-cors transformers google-play-scraper pyngrok

from flask import Flask, request, jsonify
from flask_cors import CORS  # Import CORS
from pyngrok import ngrok
import pandas as pd
from google_play_scraper import Sort, reviews_all
from transformers import pipeline
import re

# Set ngrok authtoken (replace with your actual token)
ngrok.set_auth_token("2qFPxW9lzA3sZMjRILAkRAPftPv_4J3JCvRgQRtrkgcsYw8DP")

# Initialize Flask app
app = Flask(__name__)
CORS(app)  # Enable CORS for all routes

# Start ngrok tunnel to the Flask app
public_url = ngrok.connect(5000)
print(f"Public URL: {public_url}")

# Function to extract app ID from the URL
def extract_app_id(url):
    match = re.search(r"id=([a-zA-Z0-9._-]+)", url)
    if match:
        return match.group(1)
    else:
        raise ValueError("Invalid Google Play Store URL. Please ensure the URL contains an 'id' parameter.")

# Sentiment analysis pipeline
sentiment_analysis = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english")

# Function to generate reviews based on the URL (Google Play Store URL)
@app.route('/generate_reviews', methods=['POST'])
def generate_reviews():
    url = request.json.get("url")  # Get the URL from the frontend
    if not url:
        return jsonify({"error": "Please provide a valid Google Play Store URL."}), 400

    try:
        app_id = extract_app_id(url)
        # Fetch reviews for the extracted app ID
        hkr = reviews_all(app_id, sleep_milliseconds=0, lang='en', country='US', sort=Sort.NEWEST)

        # Create a DataFrame from the reviews
        df = pd.json_normalize(hkr)

        if df.empty:
            return jsonify({"error": "No reviews found for this app."}), 404

        # Preprocess the data
        columns_to_drop = [
            'reviewId', 'userImage', 'thumbsUpCount',
            'reviewCreatedVersion', 'at', 'replyContent', 'repliedAt', 'appVersion'
        ]
        df = df.drop(columns=columns_to_drop)
        df['content'] = df['content'].astype('str')

        # Now, return only the userName and content
        reviews_data = df[['userName', 'content']].to_dict(orient='records')

        return jsonify({"reviews": reviews_data})

    except ValueError as e:
        return jsonify({"error": str(e)}), 400


# Function to generate sentiment analysis results
@app.route('/generate_sentiment', methods=['POST'])
def generate_sentiment():
    url = request.json.get("url")  # Get the URL from the frontend
    if not url:
        return jsonify({"error": "Please provide a valid Google Play Store URL."}), 400

    try:
        app_id = extract_app_id(url)
        # Fetch reviews for the extracted app ID
        hkr = reviews_all(app_id, sleep_milliseconds=0, lang='en', country='US', sort=Sort.NEWEST)

        # Create a DataFrame from the reviews
        df = pd.json_normalize(hkr)

        if df.empty:
            return jsonify({"error": "No reviews found for this app."}), 404

        # Preprocess the data
        columns_to_drop = [
            'reviewId', 'userImage', 'thumbsUpCount',
            'reviewCreatedVersion', 'at', 'replyContent', 'repliedAt', 'appVersion'
        ]
        df = df.drop(columns=columns_to_drop)
        df['content'] = df['content'].astype('str')

        # Apply sentiment analysis
        df['result'] = df['content'].apply(lambda x: sentiment_analysis(x))
        df['sentiment'] = df['result'].apply(lambda x: x[0]['label'])
        df['score'] = df['result'].apply(lambda x: x[0]['score'])

        # Calculate sentiment percentages
        positive_count = df['sentiment'].value_counts().get('POSITIVE', 0)
        negative_count = df['sentiment'].value_counts().get('NEGATIVE', 0)

        positive = (positive_count / len(df)) * 100
        negative = (negative_count / len(df)) * 100

        # Decision logic
        if positive >= 70:
            decision = "The Application is worth downloading"
        elif 50 <= positive < 70:
            decision = "The Application is well received but consider your preferences"
        elif 40 <= positive < 50:
            decision = "The Application has a balanced reception. Consider your preferences and needs."
        else:
            decision = "There are significant issues with the Application."

        return jsonify({
            "positive_percentage": positive,
            "negative_percentage": negative,
            "decision": decision
        })

    except ValueError as e:
        return jsonify({"error": str(e)}), 400


# Run the app
if __name__ == '__main__':
    app.run(port=5000)


Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Collecting flask-cors
  Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting google-play-scraper
  Downloading google_play_scraper-1.2.7-py3-none-any.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl (14 kB)
Downloading google_play_scraper-1.2.7-py3-none-any.whl (28 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok, google-play-scraper, flask-ngrok, flask-cors
Successfully installed flask-cors-5.0.0 flask-ngrok-0.0.25 google-play-scraper-1.2.7 pyngrok-7.2.3
Public URL: NgrokTunnel: "https://45ea-34-169-13-205.ngrok-free.app" -> "http://localhost:5

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/256 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Device set to use cpu


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
