In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import pytz

In [3]:
df = pd.read_csv("Play Store Data.csv")


In [4]:
def parse_reviews(value):
    if isinstance(value, str):
        value = value.strip()
        if 'M' in value:
            return int(float(value.replace('M', '')) * 1_000_000)
        elif 'K' in value:
            return int(float(value.replace('K', '')) * 1_000)
        elif value.isnumeric():
            return int(value)
    return np.nan

df['Parsed_Reviews'] = df['Reviews'].apply(parse_reviews)

In [5]:
df_filtered = df[
    (~df['App'].str.lower().str.startswith(('x', 'y', 'z'))) &
    (~df['App'].str.lower().str.contains('s')) &
    (df['Category'].str.startswith(('E', 'C', 'B'))) &
    (df['Parsed_Reviews'] > 500)
].copy()


In [6]:
df_filtered['Installs'] = df_filtered['Installs'].str.replace('[+,]', '', regex=True)
df_filtered = df_filtered[df_filtered['Installs'].str.isnumeric()]
df_filtered['Installs'] = df_filtered['Installs'].astype(int)

df_filtered['Last Updated'] = pd.to_datetime(df_filtered['Last Updated'], errors='coerce')
df_filtered['Month'] = df_filtered['Last Updated'].dt.to_period('M')


In [7]:
translation = {
    'Beauty': 'सौंदर्य',        # Hindi
    'Business': 'வணிகம்',      # Tamil
    'Dating': 'Partnersuche'   # German
}
df_filtered['Category'] = df_filtered['Category'].replace(translation)


In [8]:
grouped = df_filtered.groupby(['Month', 'Category'])['Installs'].sum().reset_index()
grouped['Month'] = grouped['Month'].dt.to_timestamp()
grouped['Pct_Change'] = grouped.groupby('Category')['Installs'].pct_change()

now_utc = datetime.utcnow().replace(tzinfo=pytz.utc)
now_ist = now_utc.astimezone(pytz.timezone('Asia/Kolkata'))


In [12]:
if 18 <= now_ist.hour < 21:
    sns.set(style="whitegrid")
    plt.figure(figsize=(14, 8))

    for cat in grouped['Category'].unique():
        cat_data = grouped[grouped['Category'] == cat]
        plt.plot(cat_data['Month'], cat_data['Installs'], label=cat)

        growth = cat_data[cat_data['Pct_Change'] > 0.2]
        plt.fill_between(growth['Month'], 0, growth['Installs'], alpha=0.3)

    plt.title("Monthly Install Trends by Category (20%+ Growth Highlighted)")
    plt.xlabel("Month")
    plt.ylabel("Total Installs")
    plt.xticks(rotation=45)
    plt.legend(title="App Category")
    plt.tight_layout()
    plt.grid(True)
    plt.show()
else:
    print("This graph is only visible between 6 PM and 9 PM IST.")


This graph is only visible between 6 PM and 9 PM IST.
