In [6]:
# --- Setup ---
from pathlib import Path
import pandas as pd
import plotly.express as px

from pathlib import Path

def find_project_root(target_relpath="data/processed/cleaned_campaign.csv", max_up=5):
    here = Path.cwd()
    for _ in range(max_up):
        candidate = here / target_relpath
        if candidate.exists():
            return here
        here = here.parent
    raise FileNotFoundError(f"Could not find {target_relpath} upwards from {Path.cwd()}")

ROOT = find_project_root()
DATA = ROOT / "data" / "processed" / "cleaned_campaign.csv"
OUT  = ROOT / "notebook" / "chart"
OUT.mkdir(parents=True, exist_ok=True)


# --- Load & prep ---
df = pd.read_csv(DATA)
# Ensure exact column names in your CSV:
# ['Campaign Name','Date','Spend [USD]','# of Impressions','Reach',
#  '# of Website Clicks','# of Searches','# of View Content','# of Add to Cart','# of Purchase','group']
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
df = df.dropna(subset=["Date"])  # guard against blank/malformed dates

# --- Helpers ---
def save_show(fig, name: str):
    """Save Plotly fig as interactive HTML and show inline."""
    html_path = OUT / f"{name}.html"
    fig.write_html(str(html_path), include_plotlyjs="cdn", auto_open=False)
    print(f"✅ Saved: {html_path.relative_to(ROOT)}")
    return fig.show()

def ts_line(y_col, title, file_stub):
    fig = px.line(
        df,
        x="Date",
        y=y_col,              # single column (wide form)
        color="group",
        markers=True,
        template="plotly_dark",
        labels={"Date": "Date", y_col: y_col.replace("# ", "")},
        title=title
    )
    fig.update_layout(legend_title_text="Group")
    save_show(fig, file_stub)

# --- Time series ---
ts_line("# of Impressions", "Impressions over Time by Group", "ts_impressions_by_group")
ts_line("Spend [USD]", "Spend (USD) over Time by Group", "ts_spend_by_group")
ts_line("# of Purchase", "Purchases over Time by Group", "ts_purchases_by_group")
ts_line("# of Website Clicks", "Website Clicks over Time by Group", "ts_clicks_by_group")

# --- Donut charts: Spend vs Purchases per group ---
def spend_vs_purchases_pie(group_label):
    dfg = df[df["group"] == group_label]
    agg = pd.DataFrame({
        "Metric": ["Total Spend", "Total Purchases"],
        "Value": [dfg["Spend [USD]"].sum(), dfg["# of Purchase"].sum()]
    })
    fig = px.pie(
        agg, names="Metric", values="Value",
        title=f"Group {group_label}: Spend vs Purchases",
        color="Metric",
        color_discrete_map={"Total Spend":"#636EFA","Total Purchases":"#EF553B"},
        hole=0.35
    )
    fig.update_traces(textposition="inside", textinfo="percent+label")
    fig.update_layout(template="plotly_dark")
    save_show(fig, f"pie_spend_vs_purchases_group_{group_label}")

for g in sorted(df["group"].unique()):
    spend_vs_purchases_pie(g)

# --- Funnel charts per group ---
def funnel_for_group(group_label):
    dfg = df[df["group"] == group_label]
    stages = ["# of Impressions", "# of Website Clicks", "# of View Content",
              "# of Add to Cart", "# of Purchase"]
    funnel_df = dfg[stages].sum().reset_index()
    funnel_df.columns = ["Stage", "Count"]
    fig = px.funnel(funnel_df, x="Count", y="Stage",
                    title=f"Conversion Funnel - Group {group_label}",
                    template="plotly_dark")
    save_show(fig, f"funnel_group_{group_label}")

for g in sorted(df["group"].unique()):
    funnel_for_group(g)


✅ Saved: notebook/chart/ts_impressions_by_group.html


✅ Saved: notebook/chart/ts_spend_by_group.html


✅ Saved: notebook/chart/ts_purchases_by_group.html


✅ Saved: notebook/chart/ts_clicks_by_group.html


✅ Saved: notebook/chart/pie_spend_vs_purchases_group_A.html


✅ Saved: notebook/chart/pie_spend_vs_purchases_group_B.html


✅ Saved: notebook/chart/funnel_group_A.html


✅ Saved: notebook/chart/funnel_group_B.html
