In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter, deque


path = ("/churn_submits.csv")

df = pd.read_csv(path)
df["timestamp"] = pd.to_datetime(df["timestamp"])
df["day"] = df["timestamp"].dt.date

# Deduplicate to one row per (day, user_id)
daily_users = (
    df[["day", "user_id"]]
    .drop_duplicates()
    .sort_values(["day", "user_id"])
    .reset_index(drop=True)
)

daily_users.head()

In [None]:
# Build mapping day -> set(user_id)
day_to_users = daily_users.groupby("day")["user_id"].apply(set).to_dict()

# Create full continuous day range to avoid gaps issues
all_days = pd.date_range(
    start=min(day_to_users.keys()),
    end=max(day_to_users.keys()),
    freq="D"
).date

len(all_days), list(all_days)[:3], list(all_days)[-3:]


In [None]:
window = deque()           # holds sets for each day in the window
user_counts = Counter()    # user_id -> count of days present in window

rows = []

for day in all_days:
    users_today = day_to_users.get(day, set())

    # Add today's users
    window.append(users_today)
    for u in users_today:
        user_counts[u] += 1

    # Remove day that falls out of 7-day window
    if len(window) > 7:
        users_out = window.popleft()
        for u in users_out:
            user_counts[u] -= 1
            if user_counts[u] == 0:
                del user_counts[u]

    dau = len(users_today)
    wau = len(user_counts)
    sticky = (dau / wau) if wau else 0.0

    rows.append((day, dau, wau, sticky))

metrics = pd.DataFrame(rows, columns=["day", "dau", "wau", "sticky_factor"])
metrics.head(10)


In [None]:
plt.figure(figsize=(12, 5))
plt.plot(metrics["day"], metrics["wau"], label="WAU (7d rolling)")
plt.plot(metrics["day"], metrics["dau"], label="DAU", alpha=0.6)
plt.title("DAU vs WAU Dynamics")
plt.xlabel("Day")
plt.ylabel("Users")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
summary = metrics["sticky_factor"].describe(percentiles=[0.1, 0.25, 0.5, 0.75, 0.9])
summary
