In [None]:
# Cell 1: Setup & auto-detect latest landing folder
from pathlib import Path
import os
import pandas as pd
from dotenv import load_dotenv

load_dotenv()
project_root = Path(os.getenv("PROJECT_ROOT"))
landing_dir = project_root / "landing"
subdirs = [d for d in landing_dir.iterdir() if d.is_dir()]
if not subdirs:
    raise FileNotFoundError(f"No landing sub-folders under {landing_dir}")
data_dir = max(subdirs, key=lambda d: d.stat().st_mtime)

print(f"Loading data from: {data_dir}")


In [None]:
# Cell 2: Load raw JSON dumps
ads       = pd.read_json(data_dir / "ads.json")
adsets    = pd.read_json(data_dir / "adsets.json")
campaigns = pd.read_json(data_dir / "campaigns.json")
insights  = pd.read_json(data_dir / "insights.json")

# Convert key metrics to numeric
for col in ("spend","impressions","clicks","reach","cpc","ctr","frequency"):
    if col in insights.columns:
        insights[col] = pd.to_numeric(insights[col], errors="coerce")


In [None]:
# Cell 3: Flatten ads.json and include campaign_id & adset_id
import json

ads_flat = ads.copy()

# extract creative.id → creative_id
ads_flat['creative_id'] = ads_flat['creative'].apply(
    lambda c: c.get('id') if isinstance(c, dict) else None
)
# serialize tracking_specs
ads_flat['tracking_specs'] = ads_flat['tracking_specs'].apply(json.dumps)

# Keep both campaign_id and adset_id for merging
ads_flat = ads_flat[[
    'id', 'campaign_id', 'adset_id', 'name', 'status',
    'effective_status', 'created_time', 'updated_time',
    'creative_id', 'tracking_specs'
]].rename(columns={
    'id': 'ad_id',
    'name': 'ad_name'
})


In [None]:
# Cell 4: Prepare campaigns_ & adsets_ for merge
campaigns_ = campaigns.rename(columns={
    'id': 'campaign_id',
    'name': 'campaign_name',
    'status': 'campaign_status',
    'objective': 'campaign_objective'
})

adsets_ = adsets.rename(columns={
    'id': 'adset_id',
    'name': 'adset_name',
    'status': 'adset_status',
    'daily_budget': 'adset_daily_budget',
    'lifetime_budget': 'adset_lifetime_budget'
})


In [None]:
# Cell 5: Build the tidy DataFrame
tidy = (
    insights
      .merge(campaigns_, on='campaign_id', how='left')
      .merge(adsets_,    on='adset_id',    how='left')
      .merge(ads_flat,   on=['campaign_id','adset_id'], how='left')
)

# Drop duplicate columns and inspect
tidy = tidy.loc[:, ~tidy.columns.duplicated()]
print("Tidy shape:", tidy.shape)
tidy.head(3)
