In [1]:
import pandas as pd 
import plotly.graph_objects as go
import matplotlib.pyplot as plt



In [2]:
# get dataset
df = pd.read_csv('/Users/ayemaq/Desktop/Mod5-Project-LinkNYC-Engagement-Analysis/data/clean/LinkNYC_Weekly_Usage_cleaned_2022-current.csv')

In [3]:
df.keys()

Index(['report_ending_weekly_starting', 'number_of_sessions',
       'average_session_length', 'number_of_unique_clients', 'tb_downloaded',
       'tb_uploaded', 'cumulative_bandwidth', 'cumulative_sessions',
       'cumulative_subscribers', 'cohort', 'sessions_per_user',
       'GB_per_session', 'log_number_of_sessions',
       'log_number_of_unique_clients', 'log_tb_downloaded', 'month'],
      dtype='object')

In [4]:
df.head()

Unnamed: 0,report_ending_weekly_starting,number_of_sessions,average_session_length,number_of_unique_clients,tb_downloaded,tb_uploaded,cumulative_bandwidth,cumulative_sessions,cumulative_subscribers,cohort,sessions_per_user,GB_per_session,log_number_of_sessions,log_number_of_unique_clients,log_tb_downloaded,month
0,2022-08-07,3668886,00:23:14,208261,106.21,18.23,27244.46,3095068353,11606145,2022-08,17.616769,0.033918,15.115399,12.246547,4.665418,8
1,2022-08-14,3752859,00:23:40,214463,112.34,19.64,27368.9,3098737239,11655953,2022-08,17.498865,0.035168,15.138029,12.275893,4.72153,8
2,2022-08-21,3296456,00:23:14,219238,96.87,15.99,27613.74,3105786554,11763607,2022-08,15.03597,0.034237,15.008359,12.297913,4.57337,8
3,2022-08-28,4047052,00:23:12,226977,116.79,19.69,27750.22,3109833606,11819266,2022-08,17.830229,0.033723,15.213499,12.332604,4.760377,8
4,2022-09-04,4055208,00:23:15,231135,119.57,20.07,27889.86,3113888814,11873405,2022-09,17.54476,0.034435,15.215513,12.350757,4.783902,9


In [72]:
# recompute fresh 75th percentile thresholds from current data
users_p75    = df["number_of_unique_clients"].quantile(0.75)
sessions_p75 = df["number_of_sessions"].quantile(0.75)
gbps_p    = .035

print("Users 75th percentile:", round(users_p75, 0))
print("Sessions 75th percentile:", round(sessions_p75, 0))
print("GB_per_session 75th percentile:", round(gbps_p75, 5))


Users 75th percentile: 297622.0
Sessions 75th percentile: 4978319.0
GB_per_session 75th percentile: 0.035


# Funnel

In [24]:
# thresholds
users_50 = df["number_of_unique_clients"].quantile(0.50)
gbps_p   = 0.035

# stage flags
df["stage1_all"]      = True
df["stage2_active"]   = df["number_of_sessions"] > 0
df["stage3_highreach"]= df["number_of_unique_clients"] >= users_50
df["stage4_engaged"]  = df["GB_per_session"] >= gbps_p
df["stage5_retained"] = (
    df["stage4_engaged"].astype(int).rolling(window=2, min_periods=2).sum().shift(-1) >= 2
)

# counts
funnel_counts = [
    ("Stage 1: All Weeks", df["stage1_all"].sum()),
    ("Stage 2: Active Weeks", df["stage2_active"].sum()),
    ("Stage 3: High-Reach Weeks", df["stage3_highreach"].sum()),
    ("Stage 4: Engaged Weeks", df["stage4_engaged"].sum()),
    ("Stage 5: Retained Weeks", df["stage5_retained"].sum())
]

funnel_df = pd.DataFrame(funnel_counts, columns=["Stage","Count"])

# step-to-step + overall conversion
funnel_df["Step → Step Conversion"] = (funnel_df["Count"] / funnel_df["Count"].shift(1)).round(3)
funnel_df.loc[0,"Step → Step Conversion"] = 1.0
funnel_df["Overall Conversion (from Stage 1)"] = (
    funnel_df["Count"] / funnel_df.loc[0,"Count"]
).round(3)

funnel_df


Unnamed: 0,Stage,Count,Step → Step Conversion,Overall Conversion (from Stage 1)
0,Stage 1: All Weeks,165,1.0,1.0
1,Stage 2: Active Weeks,165,1.0,1.0
2,Stage 3: High-Reach Weeks,83,0.503,0.503
3,Stage 4: Engaged Weeks,65,0.783,0.394
4,Stage 5: Retained Weeks,52,0.8,0.315


In [25]:
import plotly.graph_objects as go

fig = go.Figure(go.Funnel(
    y=funnel_df["Stage"],
    x=funnel_df["Count"],
    textinfo="value+percent previous+percent initial",
    marker=dict(
        color=["#6DCFF6","#4A90E2","#355070","#6D597A","#B56576"],
        line=dict(width=1,color="white")
    )
))
fig.update_layout(
    title="📊 LinkNYC True Engagement Funnel: Activity → Retention",
    plot_bgcolor="white",
    paper_bgcolor="white",
    height=520
)
fig.show()


# Feature Engineering

In [None]:
# best weeks to invest in ads