In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from datetime import datetime, time
try:
    from zoneinfo import ZoneInfo
    IST = ZoneInfo("Asia/Kolkata")
except:
    IST = None

In [2]:
#Load play store dataset 
df = pd.read_csv("Downloads/Play Store Data.csv")
print("Rows:", df.shape[0])
df.head(3)

Rows: 10841


Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up


In [3]:
df.index=range(1,len(df)+1)

In [5]:
df.head(3)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
1,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
2,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
3,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up


In [8]:
#basic cleaning 
def parse_installs(x):
    if pd.isna(x): return np.nan
    return pd.to_numeric(str(x).replace('+','').replace(',',''), errors='coerce')

df['Installs'] = df['Installs'].apply(parse_installs)
df['Category'] = df['Category'].astype('string').str.strip()

# Missing removed
df = df.dropna(subset=['Installs', 'Category'])

In [9]:
 
df = df[~df['Category'].str.startswith(('A','C','G','S'))]

# Category wise total installs
cat_installs = df.groupby('Category', as_index=False)['Installs'].sum()

# Top 5 categories by installs
top5 = cat_installs.sort_values('Installs', ascending=False).head(5)

# Highlight flag (installs > 1,000,000)
top5['Highlight'] = np.where(top5['Installs'] > 1_000_000, 'High', 'Normal')

top5


Unnamed: 0,Category,Installs,Highlight
20,PRODUCTIVITY,14176090000.0,High
21,TOOLS,11452770000.0,High
7,FAMILY,10258260000.0,High
19,PHOTOGRAPHY,10088250000.0,High
16,NEWS_AND_MAGAZINES,7496318000.0,High


In [11]:
# add random country mapping
countries = ['United States', 'India', 'Brazil', 'United Kingdom', 'Canada']
top5['Country'] = countries[:len(top5)]
top5

Unnamed: 0,Category,Installs,Highlight,Country
20,PRODUCTIVITY,14176090000.0,High,United States
21,TOOLS,11452770000.0,High,India
7,FAMILY,10258260000.0,High,Brazil
19,PHOTOGRAPHY,10088250000.0,High,United Kingdom
16,NEWS_AND_MAGAZINES,7496318000.0,High,Canada


In [12]:
def allowed_time():
    now_t = datetime.now(IST).time() if IST else datetime.now().time()
    return time(18,0) <= now_t <= time(20,0)

In [14]:
# Choropleth Map 
if allowed_time():
    fig = px.choropleth(
        top5,
        locations='Country',
        locationmode='country names',
        color='Installs',
        hover_name='Category',
        color_continuous_scale='Blues',
        title='Global Installs by Top 5 App Categories (Filtered)',
    )

    # Highlight categories > 1M installs
    highlight_cats = top5[top5['Highlight'] == 'High']
    fig.add_scattergeo(
        locations=highlight_cats['Country'],
        locationmode='country names',
        text=highlight_cats['Category'],
        mode='text',
        textfont=dict(color='red', size=14),
        showlegend=False
    )

    fig.update_layout(
        geo=dict(showframe=False, showcoastlines=True, projection_type='equirectangular'),
        title_x=0.5
    )

    fig.show()
else:
    print("This graph will only be visible between 6:00 PM and 8:00 PM IST. (It will not be shown on the dashboard at the current time.)")


This graph will only be visible between 6:00 PM and 8:00 PM IST. (It will not be shown on the dashboard at the current time.)


In [15]:

try:
    fig.write_html("Task2_Choropleth.html")
    print("✅ Choropleth saved successfully!")
except NameError:
    print("Note:The chart ran outside the scheduled time, so it was not saved.")


Note:The chart ran outside the scheduled time, so it was not saved.
