In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import pytz


In [2]:
df = pd.read_csv("Play Store Data.csv")

# Drop rows with missing essential values
df = df.dropna(subset=['Rating', 'Reviews', 'Size', 'Installs', 'Last Updated'])

# Filter by rating ≥ 4.0
df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')
df = df[df['Rating'] >= 4.0]
df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
5,Paper flowers instructions,ART_AND_DESIGN,4.4,167,5.6M,"50,000+",Free,0,Everyone,Art & Design,"March 26, 2017",1.0,2.3 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10834,FR Calculator,FAMILY,4.0,7,2.6M,500+,Free,0,Everyone,Education,"June 18, 2017",1.0.0,4.1 and up
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53M,"5,000+",Free,0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6M,100+,Free,0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,Varies with device,"1,000+",Free,0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


In [3]:
def convert_size(size):
    if 'M' in size:
        return float(size.replace('M', '').strip())
    elif 'k' in size:
        return float(size.replace('k', '').strip()) / 1024
    elif size == 'Varies with device':
        return np.nan
    return np.nan

df['Size_MB'] = df['Size'].apply(convert_size)
df = df[df['Size_MB'] >= 10]
df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Size_MB
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up,19.0
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up,25.0
7,Infinite Painter,ART_AND_DESIGN,4.1,36815,29M,"1,000,000+",Free,0,Everyone,Art & Design,"June 14, 2018",6.1.61.1,4.2 and up,29.0
8,Garden Coloring Book,ART_AND_DESIGN,4.4,13791,33M,"1,000,000+",Free,0,Everyone,Art & Design,"September 20, 2017",2.9.2,3.0 and up,33.0
10,Text on Photo - Fonteee,ART_AND_DESIGN,4.4,13880,28M,"1,000,000+",Free,0,Everyone,Art & Design,"October 27, 2017",1.0.4,4.1 and up,28.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10812,Fr Agnel Pune,FAMILY,4.1,80,13M,"1,000+",Free,0,Everyone,Education,"June 13, 2018",2.0.20,4.0.3 and up,13.0
10814,FR: My Secret Pets!,FAMILY,4.0,785,31M,"50,000+",Free,0,Teen,Entertainment,"June 3, 2015",1.3.1,3.0 and up,31.0
10827,Fr Agnel Ambarnath,FAMILY,4.2,117,13M,"5,000+",Free,0,Everyone,Education,"June 13, 2018",2.0.20,4.0.3 and up,13.0
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53M,"5,000+",Free,0,Everyone,Education,"July 25, 2017",1.48,4.1 and up,53.0


In [11]:
df['Last Updated'] = pd.to_datetime(df['Last Updated'], errors='coerce')
df = df[df['Last Updated'].dt.month == 1]

df['Installs'] = df['Installs'].astype(str).str.extract('(\d+)')
df['Installs'] = pd.to_numeric(df['Installs'], errors='coerce')
df = df.dropna(subset=['Installs'])  # Remove rows where conversion failed
df['Installs'] = df['Installs'].astype(int)

df['Reviews'] = pd.to_numeric(df['Reviews'], errors='coerce')

top_categories = df.groupby('Category')['Installs'].sum().nlargest(10).index
df_top = df[df['Category'].isin(top_categories)]


In [13]:
summary = df_top.groupby('Category').agg({
    'Rating': 'mean',
    'Reviews': 'sum'
}).reset_index()
summary

Unnamed: 0,Category,Rating,Reviews
0,EDUCATION,4.4,57645
1,ENTERTAINMENT,4.25,1238948
2,FAMILY,4.395455,4544623
3,GAME,4.313333,2397589
4,LIFESTYLE,4.38,42809
5,PERSONALIZATION,4.475,155996
6,PHOTOGRAPHY,4.15,563720
7,SHOPPING,4.2,19950
8,SPORTS,4.342857,1982017
9,TOOLS,4.2,8010


In [30]:
ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist)
if 15 <= current_time.hour < 17:
    fig, ax1 = plt.subplots(figsize=(12, 6))

    bar_width = 0.35
    index = np.arange(len(summary))

    # Bar for total reviews (primary y-axis)
    bars1 = ax1.bar(index, summary['Reviews'], bar_width, label='Total Reviews', color='green')
    ax1.set_ylabel('Total Reviews', color='green')
    ax1.tick_params(axis='y', labelcolor='green')

    # Bar for average rating (secondary y-axis)
    ax2 = ax1.twinx()
    bars2 = ax2.bar(index + bar_width, summary['Rating'], bar_width, label='Average Rating', color='blue')
    ax2.set_ylabel('Average Rating', color='blue')
    ax2.tick_params(axis='y', labelcolor='blue')
    ax2.set_ylim(0, 5)  # Ratings are on a scale of 0 to 5

    # X-axis setup
    ax1.set_xlabel('App Category')
    ax1.set_title('Top 10 App Categories by Installs (Filtered)')
    ax1.set_xticks(index + bar_width / 2)
    ax1.set_xticklabels(summary['Category'], rotation=45)

    # Combine legends
    bars = bars1 + bars2
    labels = [bar.get_label() for bar in bars]
    ax1.legend()

    plt.tight_layout()
    plt.show()
else:
    print(f"⛔ Graph not shown: Current IST time is {current_time.strftime('%H:%M')}.\n""This visualization is only available between 15:00 and 17:00 IST.")

⛔ Graph not shown: Current IST time is 11:00.
This visualization is only available between 15:00 and 17:00 IST.
