In [26]:
from app_store_scraper import AppStore
import pandas as pd
from textblob import TextBlob
from collections import Counter
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
import matplotlib.pyplot as plt
import seaborn as sns
import os
import string

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/yaroslavopanasenko/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/yaroslavopanasenko/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


In [2]:
def preprocess_data(reviews):
    df = pd.DataFrame(reviews)
    df["cleaned_review"] = df["review"].str.lower().str.replace(r'[^\w\s]', '')
    return df

def calculate_metrics(data):
    avg_rating = data["rating"].mean()
    rating_dist = data["rating"].value_counts(normalize=True) * 100
    return {
        "average_rating": str(avg_rating),
        "rating_distribution": rating_dist.to_dict()
    }

def generate_csv(reviews):
    df = pd.DataFrame(reviews)
    file_path = os.path.join("reports", "reviews.csv")
    df.to_csv(file_path, index=False)
    return file_path

def fetch_reviews(app_name, app_id, country='us', num_reviews=100):
    app = AppStore(country=country, app_name = app_name, app_id = app_id)
    app.review(how_many=num_reviews)
    return app.reviews

In [24]:
def analyze_sentiment(reviews):
    sentiment_results = []
    for review in reviews:
        analysis = TextBlob(review["cleaned_review"])
        polarity = analysis.sentiment.polarity
        sentiment = "positive" if polarity > 0 else "negative" if polarity < 0 else "neutral"
        sentiment_results.append({
            "title": review["title"],
            "sentiment": sentiment,
            "review": review["review"],
            "cleaned_review": review["cleaned_review"]
        })
    return sentiment_results

def generate_insights(reviews):
    negative_reviews = [review["cleaned_review"] for review in analyze_sentiment(reviews) if review["sentiment"] == "negative"]
    
    # Tokenize and extract common words
    all_words = []
    for review in negative_reviews:
        words = word_tokenize(review.lower())
        words = [word for word in words if word.isalpha() and word not in ["the", "and", "is", "to", "a", "of", "in"]]
        all_words.extend(words)
    
    word_counts = Counter(all_words)
    common_issues = word_counts.most_common(5)
    
    suggestions = []
    issue_mapping = {
        "slow": "Enhance app performance to improve user experience.",
        "lag": "Enhance app performance to improve user experience.",
        "performance": "Enhance app performance to improve user experience.",
        "crash": "Fix bugs and stability issues to prevent crashes.",
        "bug": "Fix bugs and stability issues to prevent crashes.",
        "error": "Fix bugs and stability issues to prevent crashes.",
        "design": "Improve the user interface for better usability.",
        "ui": "Improve the user interface for better usability.",
        "layout": "Improve the user interface for better usability.",
        "support": "Enhance customer support responsiveness.",
        "help": "Enhance customer support responsiveness.",
        "customer": "Enhance customer support responsiveness.",
        "battery": "Optimize battery consumption for efficiency.",
        "drain": "Optimize battery consumption for efficiency.",
        "power": "Optimize battery consumption for efficiency."
    }
    
    for word, _ in common_issues:
        if word in issue_mapping:
            suggestions.append(issue_mapping[word])
    
    insights = {
        "common_negative_phrases": common_issues,
        "suggested_improvements": list(set(suggestions))
    }
    
    return insights

In [4]:
def generate_visualizations(data):
    os.makedirs("reports", exist_ok=True)
    
    # Rating Distribution
    plt.figure(figsize=(8, 6))
    sns.countplot(x=data['rating'], palette='viridis')
    plt.title("Rating Distribution")
    plt.xlabel("Rating")
    plt.ylabel("Count")
    plt.savefig("reports/rating_distribution.png")
    plt.close()
    
    # Sentiment Distribution
    plt.figure(figsize=(8, 6))
    sentiment_counts = data['sentiment'].value_counts()
    sentiment_counts.plot(kind='bar', color=['green', 'gray', 'red'])
    plt.title("Sentiment Distribution")
    plt.xlabel("Sentiment")
    plt.ylabel("Count")
    plt.savefig("reports/sentiment_distribution.png")
    plt.close()

In [5]:
def collect_reviews(app_name: str, app_id: str):
    reviews = fetch_reviews(app_name, app_id)
    return reviews


def get_metrics(app_name: str, app_id: str):
    reviews = fetch_reviews(app_name, app_id)
    data = preprocess_data(reviews)
    metrics = calculate_metrics(data)
    generate_visualizations(data)
    return {
        "metrics": metrics,
        "visualizations": {
            "rating_distribution": "reports/rating_distribution.png",
            "sentiment_distribution": "reports/sentiment_distribution.png"
        }
    }


def get_sentiments(app_name: str, app_id: str):
    reviews = fetch_reviews(app_name, app_id)
    sentiments = analyze_sentiment(reviews)
    return {"message": "Sentiment analysis completed", "sentiments": sentiments}


def download_reviews(app_name: str, app_id: str):
    reviews = fetch_reviews(app_name, app_id)
    csv_file = generate_csv(reviews)
    return {"message": "CSV generated successfully", "file_path": csv_file}


def get_insights(app_name: str, app_id: str):
    reviews = fetch_reviews(app_name, app_id)
    insights = generate_insights(reviews)
    return insights

In [6]:
app_name = "nebula-horoscope-astrology"
app_id = "1459969523"

reviews = collect_reviews(app_name, app_id)
reviews

2025-01-31 16:27:45,147 [INFO] Base - Initialised: AppStore('us', 'nebula-horoscope-astrology', 1459969523)
2025-01-31 16:27:45,147 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/us/app/nebula-horoscope-astrology/id1459969523
2025-01-31 16:27:46,491 [INFO] Base - [id:1459969523] Fetched 100 reviews (100 fetched in total)


[{'date': datetime.datetime(2019, 10, 5, 23, 30, 44),
  'developerResponse': {'id': 11098150,
   'body': 'Thanks for your review! 💖',
   'modified': '2019-10-07T11:41:24Z'},
  'review': "Crazy accurate!!! I am not sure who writes these or how they come up with them but it's almost crazy how accurate they are sometimes!! I'm usually not one of those swear by astrology sign type of people but this app really describes what I am going through most of the time and it gives you positive hope. And I even once had an appointment with an astrologer who was supposed to help me solve some of my personal issues, and that was of course a different experience. But for those who haven’t seen real astrologers, this app is the closest to it I’ve ever seen And I can tell you for sure that with the help of the stars you can find out the most important numbers in your life, which tattoo should people of your sign do, when is the best moon for a new haircut, and a lot more. And this app is great about it,

In [7]:
df = pd.DataFrame(reviews)
df.head()

Unnamed: 0,date,developerResponse,review,rating,isEdited,title,userName
0,2019-10-05 23:30:44,"{'id': 11098150, 'body': 'Thanks for your revi...",Crazy accurate!!! I am not sure who writes the...,5,False,Anna,Uz3-155-597
1,2019-10-05 23:27:49,"{'id': 11098037, 'body': 'It's so kind of you,...",I really like getting my daily horoscope in th...,5,False,Alex,m8fjl5VA
2,2019-10-05 23:27:35,"{'id': 11098029, 'body': 'Appreciate your feed...",Nothing but great things to say about this app...,5,False,Tanya,Сцукатик
3,2024-11-22 11:43:24,"{'id': 48445622, 'body': 'Hello dear user! ...",I went through a bunch of psychics before i fo...,5,False,Expensive; not all psychics like they say they...,B. S. Readers
4,2024-06-28 22:01:14,"{'id': 45075114, 'body': 'Hello Dear Customer,...","0 stars, if I could do negative starts I would...",1,False,"Worthless, and it’s Such a Shame Really",ShamefulAppReviewer


In [8]:
df["cleaned_review"] = df["review"].str.lower().str.replace(r'[^\w\s]', '')
df["cleaned_review"] = df["cleaned_review"].apply(lambda x: x.translate(str.maketrans('', '', string.punctuation)))
df.head()

Unnamed: 0,date,developerResponse,review,rating,isEdited,title,userName,cleaned_review
0,2019-10-05 23:30:44,"{'id': 11098150, 'body': 'Thanks for your revi...",Crazy accurate!!! I am not sure who writes the...,5,False,Anna,Uz3-155-597,crazy accurate i am not sure who writes these ...
1,2019-10-05 23:27:49,"{'id': 11098037, 'body': 'It's so kind of you,...",I really like getting my daily horoscope in th...,5,False,Alex,m8fjl5VA,i really like getting my daily horoscope in th...
2,2019-10-05 23:27:35,"{'id': 11098029, 'body': 'Appreciate your feed...",Nothing but great things to say about this app...,5,False,Tanya,Сцукатик,nothing but great things to say about this app...
3,2024-11-22 11:43:24,"{'id': 48445622, 'body': 'Hello dear user! ...",I went through a bunch of psychics before i fo...,5,False,Expensive; not all psychics like they say they...,B. S. Readers,i went through a bunch of psychics before i fo...
4,2024-06-28 22:01:14,"{'id': 45075114, 'body': 'Hello Dear Customer,...","0 stars, if I could do negative starts I would...",1,False,"Worthless, and it’s Such a Shame Really",ShamefulAppReviewer,0 stars if i could do negative starts i would ...


In [9]:
result = calculate_metrics(df)
result

{'average_rating': '2.44',
 'rating_distribution': {1: 45.0,
  5: 17.0,
  3: 14.000000000000002,
  2: 12.0,
  4: 12.0}}

In [20]:
sentiments = analyze_sentiment(df.to_dict(orient="records"))
df["sentiment"] = [s["sentiment"] for s in sentiments]
df.head()

Unnamed: 0,date,developerResponse,review,rating,isEdited,title,userName,cleaned_review,sentiment
0,2019-10-05 23:30:44,"{'id': 11098150, 'body': 'Thanks for your revi...",Crazy accurate!!! I am not sure who writes the...,5,False,Anna,Uz3-155-597,crazy accurate i am not sure who writes these ...,positive
1,2019-10-05 23:27:49,"{'id': 11098037, 'body': 'It's so kind of you,...",I really like getting my daily horoscope in th...,5,False,Alex,m8fjl5VA,i really like getting my daily horoscope in th...,positive
2,2019-10-05 23:27:35,"{'id': 11098029, 'body': 'Appreciate your feed...",Nothing but great things to say about this app...,5,False,Tanya,Сцукатик,nothing but great things to say about this app...,positive
3,2024-11-22 11:43:24,"{'id': 48445622, 'body': 'Hello dear user! ...",I went through a bunch of psychics before i fo...,5,False,Expensive; not all psychics like they say they...,B. S. Readers,i went through a bunch of psychics before i fo...,positive
4,2024-06-28 22:01:14,"{'id': 45075114, 'body': 'Hello Dear Customer,...","0 stars, if I could do negative starts I would...",1,False,"Worthless, and it’s Such a Shame Really",ShamefulAppReviewer,0 stars if i could do negative starts i would ...,positive


In [21]:
generate_visualizations(df)

2025-01-31 17:25:16,822 [INFO] matplotlib.category - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x=data['rating'], palette='viridis')
2025-01-31 17:25:16,826 [INFO] matplotlib.category - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.


In [22]:
csv_file = generate_csv(reviews)
csv_file

'reports/reviews.csv'

In [27]:
insights = generate_insights(df.to_dict(orient="records"))
insights

{'common_negative_phrases': [('i', 164),
  ('this', 69),
  ('it', 65),
  ('app', 64),
  ('my', 63)],
 'suggested_improvements': []}