# STEP 1: Import Libraries

In [30]:
import pandas as pd
import numpy as np
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

# Load datasets
apps = pd.read_csv("apps.csv")
reviews = pd.read_csv("user_reviews.csv")

print("✅ Data Loaded Successfully")
print("Apps Shape:", apps.shape)
print("Reviews Shape:", reviews.shape)


✅ Data Loaded Successfully
Apps Shape: (9659, 14)
Reviews Shape: (64295, 5)


# STEP 2: Data Cleaning & Preparation

In [31]:
# Drop rows with missing ratings
apps.dropna(subset=['Rating'], inplace=True)

# Clean 'Installs' column
apps['Installs'] = apps['Installs'].str.replace('+', '', regex=False).str.replace(',', '').astype(int)

# Clean 'Price' column
apps['Price'] = apps['Price'].str.replace('$', '', regex=False).astype(float)

# Convert 'Reviews' to numeric
apps['Reviews'] = pd.to_numeric(apps['Reviews'], errors='coerce')

# Remove duplicates
apps.drop_duplicates(inplace=True)

# Filter paid apps for visual analysis
paid_apps = apps[apps['Type'] == 'Paid']

# Save cleaned datasets
apps.to_csv("cleaned_apps.csv", index=False)
reviews.to_csv("cleaned_reviews.csv", index=False)

print("✅ Data Cleaning Completed & Saved")


✅ Data Cleaning Completed & Saved


# STEP 3: Category Exploration

In [None]:
# Category Exploration – Top 10 App Categories

top_categories = apps['Category'].value_counts().head(10)
print("📊 Top 10 App Categories:")
print(top_categories)


📊 Top 10 App Categories:
Category
FAMILY             1608
GAME                912
TOOLS               718
FINANCE             302
PRODUCTIVITY        301
LIFESTYLE           301
PERSONALIZATION     298
MEDICAL             290
BUSINESS            263
PHOTOGRAPHY         263
Name: count, dtype: int64


# STEP 4: Metrics Analysis

In [None]:
# Metrics Analysis – Ratings, Installs, Pricing

print("\n📈 Metrics Summary:")
print("Average App Rating:", round(apps['Rating'].mean(), 2))
print("Highest Number of Installs:", apps['Installs'].max())
print("Most Reviewed App:", apps.loc[apps['Reviews'].idxmax(), 'App'])
print("Average App Price (Paid only):", round(paid_apps['Price'].mean(), 2))



📈 Metrics Summary:
Average App Rating: 4.17
Highest Number of Installs: 1000000000
Most Reviewed App: Facebook
Average App Price (Paid only): 14.08


# STEP 5: Sentiment Analysis

In [None]:
# Sentiment Analysis – User Sentiment Summary

print("\n💬 Sentiment Analysis Summary:")
sentiment_counts = reviews['Sentiment'].value_counts()
sentiment_percent = sentiment_counts / sentiment_counts.sum() * 100

print("Sentiment Counts:\n", sentiment_counts)
print("Sentiment Percentages:\n", sentiment_percent.round(2))



💬 Sentiment Analysis Summary:
Sentiment Counts:
 Sentiment
Positive    23998
Negative     8271
Neutral      5163
Name: count, dtype: int64
Sentiment Percentages:
 Sentiment
Positive    64.11
Negative    22.10
Neutral     13.79
Name: count, dtype: float64


# STEP 6: Interactive Visualizations

In [None]:
# Interactive Visualizations (Using Plotly)

# 1. Top 10 App Categories (Bar Chart)
fig1 = px.bar(
    x=top_categories.values[::-1],
    y=top_categories.index[::-1],
    orientation='h',
    title='Top 10 App Categories',
    labels={'x': 'App Count', 'y': 'Category'},
    color=top_categories.values[::-1],
    color_continuous_scale='Viridis'
)
fig1.show()

# 2. Rating Distribution (Histogram)
fig2 = px.histogram(
    apps,
    x='Rating',
    nbins=30,
    title='Rating Distribution of Apps',
    color_discrete_sequence=['#00CC96']
)
fig2.show()

# 3. Free vs Paid Apps (Pie Chart)
fig3 = px.pie(
    apps,
    names='Type',
    title='App Type Distribution (Free vs Paid)',
    hole=0.4
)
fig3.show()

# 4. Price vs Rating (Scatter for Paid Apps)
fig4 = px.scatter(
    paid_apps,
    x='Price',
    y='Rating',
    title='Price vs Rating (Paid Apps)',
    hover_name='App',
    color='Price',
    color_continuous_scale='Plasma'
)
fig4.show()

# 5. Sentiment Distribution (Bar Chart)
fig5 = px.bar(
    x=sentiment_counts.index,
    y=sentiment_counts.values,
    title='User Sentiment from Reviews',
    labels={'x': 'Sentiment', 'y': 'Count'},
    color=sentiment_counts.values,
    color_continuous_scale='Cividis'
)
fig5.show()
