In [2]:
!pip install --upgrade google-play-scraper pandas openpyxl

from google_play_scraper import search, app, reviews
import pandas as pd
import re
import logging
import csv



logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

def clean_text(text):
    """Cleans text by removing special characters, HTML tags, newlines,
    commas, extra spaces, and leading/trailing spaces."""
    if not isinstance(text, str):
        return ""
    text = re.sub(r"<[^>]+>", "", text)
    text = re.sub(r"[^\w\s]", "", text)
    text = re.sub(r"\s+", " ", text)
    return text.strip()

def is_meaningful_review(text):
    if not isinstance(text, str): return False
    t = text.strip().lower()
    if len(t) < 20: return False
    if not re.search(r"[a-zA-Z]", t): return False
    meaningless_phrases = [
        "bagus sekali","aplikasi terbaik","mantap jiwa","recommended banget",
        "ok banget","very good","bagus banget","suka banget","👍👍",
        "good app","nice app","ok lah","the best","love it","i like it"
    ]
    if any(phrase in t for phrase in meaningless_phrases): return False
    noisy_words = {"bagus","oke","mantap","keren","suka","top","good","nice","great",
                   "👍","👌","lumayan","terbaik","recommended","mantul","sip","jos","hebat"}
    words = t.split()
    if len(words) <= 5 and all(w in noisy_words for w in words): return False
    return True
def classify_review_topic(text):
    t = text.lower()
    if re.search(r"crash|force close|keluar sendiri|bug|error|hang|stuck", t):
        return "Crash/Error"
    elif re.search(r"lemot|lambat|loading|macet|nge-lag|nge lag|delay|slow", t):
        return "Performance"
    elif re.search(r"login|akun|email|daftar|sign in|masuk|password|otp|kode verifikasi", t):
        return "Login Issue"
    elif re.search(r"iklan|ads|pop[- ]?up|advertisement", t):
        return "Ads/Commercial"
    elif re.search(r"fitur|tidak ada|hilang|tidak tersedia|kurang lengkap|fitur tidak|mohon tambahkan", t):
        return "Missing Feature/Request"
    elif re.search(r"pembayaran|bayar|payment|tagihan|in-app purchase|transaksi|refund", t):
        return "Payment/Transaction"
    elif re.search(r"harganya|harga|biaya|terlalu mahal|murah|pricing|cost|expensive|cheap", t):
        return "Pricing Issue"
    elif re.search(r"lokasi|gps|map|navigasi|arah|petunjuk|maps|location|wrong location", t):
        return "Navigation/Map"
    elif re.search(r"layanan|service|cs|customer service|pelayanan|support|bantuan", t):
        return "Customer Support"
    elif re.search(r"user friendly|tampilan|antarmuka|desain|interface|ui|ux|mudah digunakan", t):
        return "UI/UX Experience"
    else:
        return "other"


def analyze_description(description):
    """Analyzes app description and extracts potential features and sub-features."""
    features = []
    keywords = ["track", "monitor", "log", "analyze", "record"]
    if any(word in description.lower() for word in keywords):
        features.append("Activity Tracking")
    return features

def fetch_reviews(app_id, max_reviews=100):
    """Fetches reviews for a given app ID with robust error handling."""
    try:

        result, _ = reviews(
            app_id,
            lang="id",
            country="id",
            count=max_reviews

        )
        review_data = []
        for review in result:
            try:

                review_id = review.get("reviewId", "")
                user_name = review.get("userName", "")
                rating = review.get("score", 0)
                content = review.get("content", "")
                date_obj = review.get("at", None)

                user_name = clean_text(user_name) if isinstance(user_name, str) else ""
                content = clean_text(content) if isinstance(content, str) else ""
                rating = rating if isinstance(rating, (int, float)) else 0


                if not is_meaningful_review(content):
                    continue
                review_data.append({
                    "App ID": app_id,
                    "Review ID": review_id,
                    "User Name": user_name,
                    "Rating": rating,
                    "Content": content,
                    "Topic": classify_review_topic(content)

                })
            except Exception as e:
                logging.warning(f"Error processing a review for {app_id}: {e}")
        return review_data
    except Exception as e:

        logging.warning(f"Error fetching reviews for {app_id}: {e}")
        return []

def fetch_apps_by_keyword(keywords, min_installs=100000, max_results=10):
    app_details = []
    all_reviews = []

    for keyword in keywords:
        logging.info(f"Fetching apps for keyword: {keyword}")
        try:

            search_results = search(keyword, lang="id", country="id")

            limited_results = search_results[:max_results]

            for app_info in limited_results:
                installs = app_info.get("installs", 0)
                if isinstance(installs, str):
                    installs = int(re.sub(r"[^\d]", "", installs) or 0)

                if installs >= min_installs:
                    try:
                        details = app(app_info["appId"])
                        app_id = app_info["appId"]

                        app_details.append({
                            "App ID": app_id,
                            "Keyword": clean_text(keyword),
                            "App Name": clean_text(details.get("title", "")),
                            "Developer": clean_text(details.get("developer", "")),
                            "Rating": details.get("score", 0),
                            "Installs": details.get("installs", 0),
                            "Category": clean_text(details.get("genre", "")),
                            "Description": clean_text(details.get("description", "")),
                            "Features": analyze_description(details.get("description", "")),
                        })


                        reviews_data = fetch_reviews(app_id, max_reviews=100)
                        all_reviews.extend(reviews_data)

                    except Exception as e:
                        logging.warning(f"Error fetching app details for {app_id}: {e}")
        except Exception as e:
            logging.error(f"Error searching apps for keyword {keyword}: {e}")

    return pd.DataFrame(app_details), pd.DataFrame(all_reviews)


keywords = [
    "Flight Booking",
    "Hotel Reservations",
    "Travel Itinerary Planning",
    "Navigation and Maps",
    "Local Guides and Attractions",
    "Language Translation for Travelers",
    "Currency Converter and Travel Budgeting",
    "Travel Communities and Journals",
    "Transportation Booking (Bus, Train, Car Rental)",
    "Travel Safety and Alerts"
]



df_apps, df_reviews = fetch_apps_by_keyword(keywords)


print("Detail Aplikasi yang Ditemukan:")
print(df_apps)

print("\nUlasan Aplikasi yang Ditemukan:")
print(df_reviews)


output_file_apps = "App_description.xlsx"
output_file_reviews = "App_review.xlsx"
df_apps.to_excel(output_file_apps, index=False, engine="openpyxl")
df_reviews.to_excel(output_file_reviews, index=False, engine="openpyxl")


output_file_apps_csv = "App_description.csv"
output_file_reviews_csv = "App_review.csv"

df_apps.to_csv(output_file_apps_csv, index=False, quoting=csv.QUOTE_ALL)
df_reviews.to_csv(output_file_reviews_csv, index=False, quoting=csv.QUOTE_ALL)



Detail Aplikasi yang Ditemukan:
                           App ID  \
0           com.traveloka.android   
1                   ctrip.english   
2     net.skyscanner.android.main   
3       com.agoda.mobile.consumer   
4                  com.tiket.gits   
..                            ...   
74          com.traveloka.android   
75         id.go.kemlu.safetravel   
76  com.rghvsapp.android.sosalert   
77  jp.co.rcsc.safetyTips.android   
78      com.marinetraffic.android   

                                        Keyword  \
0                                Flight Booking   
1                                Flight Booking   
2                                Flight Booking   
3                                Flight Booking   
4                                Flight Booking   
..                                          ...   
74  Transportation Booking Bus Train Car Rental   
75                     Travel Safety and Alerts   
76                     Travel Safety and Alerts   
77           