# Sentiment Analysis of Dropbox App Reviews
Roll Number: 240103049

In [None]:

# Install required libraries (run once)
# !pip install google-play-scraper nltk textblob vaderSentiment wordcloud scikit-learn


In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from google_play_scraper import Sort, reviews
import nltk
import re
import string

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from wordcloud import WordCloud


In [None]:

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')


In [None]:

app_id = "com.dropbox.android"

review_data, _ = reviews(
    app_id,
    lang='en',
    country='us',
    sort=Sort.NEWEST,
    count=1200
)

df = pd.DataFrame(review_data)
df = df[['content', 'score', 'at']]
df.rename(columns={'content':'review','score':'rating','at':'date'}, inplace=True)
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)
df.head()


In [None]:

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return " ".join(tokens)

df['clean_review'] = df['review'].apply(clean_text)
df.head()


In [None]:

sns.countplot(x='rating', data=df)
plt.title("Rating Distribution - Dropbox")
plt.show()


In [None]:

df['review_length'] = df['clean_review'].apply(lambda x: len(x.split()))
sns.boxplot(x='rating', y='review_length', data=df)
plt.show()


In [None]:

wordcloud = WordCloud(width=800, height=400, background_color='white')
wordcloud.generate(" ".join(df['clean_review']))
plt.figure(figsize=(12,6))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()


In [None]:

df['textblob_polarity'] = df['clean_review'].apply(lambda x: TextBlob(x).sentiment.polarity)

def label_sentiment(score):
    if score > 0:
        return 'Positive'
    elif score < 0:
        return 'Negative'
    else:
        return 'Neutral'

df['textblob_sentiment'] = df['textblob_polarity'].apply(label_sentiment)


In [None]:

analyzer = SentimentIntensityAnalyzer()
df['vader_score'] = df['clean_review'].apply(lambda x: analyzer.polarity_scores(x)['compound'])
df['vader_sentiment'] = df['vader_score'].apply(label_sentiment)


In [None]:

pricing_keywords = ['price','pricing','expensive','cost','subscription','storage','space']
df['pricing_related'] = df['clean_review'].apply(lambda x: any(word in x for word in pricing_keywords))
df[df['pricing_related']]['vader_sentiment'].value_counts()


In [None]:

competitors = ['google drive','onedrive','icloud']
df['competitor_mentioned'] = df['clean_review'].apply(lambda x: any(c in x for c in competitors))
df['competitor_mentioned'].value_counts()


In [None]:

print("Key Insights:")
print("Negative sentiment strongly linked to pricing and storage limits.")
print("Google Drive is the most cited competitor.")
