In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import datetime as dt

# Load dataset
df = pd.read_csv("GooglePlayStore.zip", usecols=["App", "Category", "Installs", "Content Rating", "Last Updated"])

# Data Cleaning
df["Installs"] = df["Installs"].str.replace("[+,]", "", regex=True).astype(float)
df["Last Updated"] = pd.to_datetime(df["Last Updated"], errors="coerce")
df = df.dropna(subset=["Last Updated"])

# Filtering data based on conditions
filtered_df = df[
    (df["Content Rating"] == "Teen") &                   
    (df["App"].str.startswith("E")) &                    
    (df["Installs"] > 10000) &                           
    (df["Category"].notnull()) &                         
    (df["Last Updated"].notnull())                       
]

# Grouping and sorting
filtered_df = filtered_df.groupby(["Last Updated", "Category", "Content Rating"], as_index=False)["Installs"].sum()
filtered_df = filtered_df.sort_values("Last Updated")

# MoM Growth Calculation
filtered_df["Prev Installs"] = filtered_df.groupby("Category")["Installs"].shift(1)
filtered_df["MoM Growth"] = ((filtered_df["Installs"] - filtered_df["Prev Installs"]) / filtered_df["Prev Installs"]) * 100

# Time-based condition
current_time = dt.datetime.now().time()
if dt.time(18, 0) <= current_time <= dt.time(21, 0):  
    # Main Graph
    fig_main = px.line(
        filtered_df, 
        x="Last Updated", 
        y="Installs", 
        color="Category", 
        title="Teen-Rated Apps: Total Installs Over Time",
        markers=True, 
        hover_data=["MoM Growth", "Content Rating"]
    )

    fig_main.update_yaxes(title_text="Total Installs (Log Scale)", type="log")
    fig_main.update_xaxes(title_text="Date")

    # Highlight High Growth Areas
    for category in filtered_df["Category"].unique():
        category_data = filtered_df[filtered_df["Category"] == category]
        growth_exceeds_20 = category_data[category_data["MoM Growth"] > 20]

        fig_main.add_trace(go.Scatter(
            x=growth_exceeds_20["Last Updated"],
            y=growth_exceeds_20["Installs"],
            fill='tonexty',
            fillcolor="rgba(255, 0, 0, 0.2)",  
            mode="none",
            name=f"High Growth ({category})"
        ))

    fig_main.show()

    # Display Individual Graphs for Each Category
    for category in filtered_df["Category"].unique():
        category_data = filtered_df[filtered_df["Category"] == category]

        fig_category = px.line(
            category_data, 
            x="Last Updated", 
            y="Installs", 
            title=f"Category: {category} - Installs Over Time",
            markers=True
        )

        fig_category.update_yaxes(title_text="Total Installs (Log Scale)", type="log")
        fig_category.update_xaxes(title_text="Date")

        fig_category.show()

else:
    print("The graph is only available between 6 PM IST and 9 PM IST.")



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version thi


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

