In [None]:
# Cell 1: Setup and auto-detect latest landing folder
from pathlib import Path
import os
import pandas as pd
from dotenv import load_dotenv

# load PROJECT_ROOT from .env
load_dotenv()
project_root = Path(os.getenv("PROJECT_ROOT"))

# locate landing directory and pick the most recently modified sub-folder
landing_dir = project_root / "landing"
subdirs = [d for d in landing_dir.iterdir() if d.is_dir()]
if not subdirs:
    raise FileNotFoundError(f"No subdirectories found in {landing_dir}")
data_dir = max(subdirs, key=lambda d: d.stat().st_mtime)

# sanity check
print("Using data directory:", data_dir)
print("Files:", [p.name for p in data_dir.iterdir()])


Data directory: C:\Users\Earth\BEDROT PRODUCTIONS\BEDROT DATA LAKE\data_lake\landing\20250526_184101
Files: [WindowsPath('C:/Users/Earth/BEDROT PRODUCTIONS/BEDROT DATA LAKE/data_lake/landing/20250526_184101/ads.json'), WindowsPath('C:/Users/Earth/BEDROT PRODUCTIONS/BEDROT DATA LAKE/data_lake/landing/20250526_184101/adsets.json'), WindowsPath('C:/Users/Earth/BEDROT PRODUCTIONS/BEDROT DATA LAKE/data_lake/landing/20250526_184101/campaigns.json'), WindowsPath('C:/Users/Earth/BEDROT PRODUCTIONS/BEDROT DATA LAKE/data_lake/landing/20250526_184101/insights.json')]


In [2]:
# Cell 2: Load JSON into DataFrames
ads       = pd.read_json(data_dir / "ads.json")
adsets    = pd.read_json(data_dir / "adsets.json")
campaigns = pd.read_json(data_dir / "campaigns.json")
insights  = pd.read_json(data_dir / "insights.json")

# ensure numeric types for spend/impressions etc.
for col in ["spend","impressions","clicks","reach"]:
    if col in insights.columns:
        insights[col] = pd.to_numeric(insights[col], errors="coerce")

# quick peek
ads.head(), adsets.head(), campaigns.head(), insights.head()


(                   id name  status            adset_id         campaign_id  \
 0  120224903624100080  AD1  ACTIVE  120224903624060080  120224903624070080   
 1  120224906419920080  AD3  ACTIVE  120224906419930080  120224906419890080   
 2  120224907266770080  AD4  ACTIVE  120224907266820080  120224907266750080   
 3  120224904466980080  AD2  ACTIVE  120224904466990080  120224903624070080   
 4  120224906419910080  AD4  ACTIVE  120224906419970080  120224906419890080   
 
   effective_status                    creative  \
 0     ADSET_PAUSED  {'id': '2594406180904571'}   
 1     ADSET_PAUSED  {'id': '1418315172694032'}   
 2     ADSET_PAUSED  {'id': '2178986529195170'}   
 3     ADSET_PAUSED   {'id': '627314753692200'}   
 4     ADSET_PAUSED   {'id': '778352828063508'}   
 
                                       tracking_specs  \
 0  [{'action.type': ['offsite_conversion'], 'fb_p...   
 1  [{'action.type': ['offsite_conversion'], 'fb_p...   
 2  [{'action.type': ['offsite_conversion'], 

In [3]:
# Cell 3: File shapes
shapes = pd.DataFrame({
    "file":       ["ads","adsets","campaigns","insights"],
    "rows":       [ads.shape[0], adsets.shape[0], campaigns.shape[0], insights.shape[0]],
    "columns":    [ads.shape[1], adsets.shape[1], campaigns.shape[1], insights.shape[1]]
})
print(shapes)


        file  rows  columns
0        ads    54       10
1     adsets    31       13
2  campaigns    13        7
3   insights    13       10


In [4]:
# Cell 4: Status distributions
camp_status  = campaigns["status"].value_counts().rename_axis("status").reset_index(name="count")
adset_status = adsets   ["status"].value_counts().rename_axis("status").reset_index(name="count")
ad_status    = ads      ["status"].value_counts().rename_axis("status").reset_index(name="count")

print("Campaign statuses:\n",  camp_status, "\n")
print("Adset statuses:\n",   adset_status, "\n")
print("Ad statuses:\n",      ad_status,    "\n")


Campaign statuses:
    status  count
0  PAUSED      9
1  ACTIVE      4 

Adset statuses:
    status  count
0  PAUSED     21
1  ACTIVE     10 

Ad statuses:
    status  count
0  ACTIVE     32
1  PAUSED     22 



In [5]:
# Cell 5: Relational counts
#  • adsets per campaign
adsets_per_campaign = adsets.groupby("campaign_id").size().reset_index(name="adset_count")

#  • ads per adset
ads_per_adset = ads.groupby("adset_id").size().reset_index(name="ads_count")

print("Adsets per campaign:\n", adsets_per_campaign.head(), "\n")
print("Ads per adset:\n",      ads_per_adset.head(),    "\n")


Adsets per campaign:
           campaign_id  adset_count
0  120214803933120080            1
1  120214961689440080            1
2  120215277232180080            1
3  120217792461870080            2
4  120218067399650080            2 

Ads per adset:
              adset_id  ads_count
0  120214803933310080          3
1  120214961689410080          3
2  120215277232140080          3
3  120217792461860080          3
4  120218066028920080          3 



In [6]:
# Cell 6: Campaign performance overview & stats
camp_perf = (
    campaigns[["id","name","objective"]]
      .merge(insights[["campaign_id","impressions","clicks","spend","reach"]],
             left_on="id", right_on="campaign_id", how="left")
)

perf_stats = camp_perf[["impressions","clicks","spend","reach"]].describe()

print("Performance overview:\n", camp_perf.head(), "\n")
print("Performance stats:\n",   perf_stats,      "\n")


Performance overview:
                    id                                              name  \
0  120224907266750080          PIG1987 - THE STATE OF THE WORLD - BROAD   
1  120224906903540080  PIG1987 - THE STATE OF THE WORLD - BROAD SPOTIFY   
2  120224906419890080    PIG1987 - THE STATE OF THE WORLD - HARD TRANCE   
3  120224903624070080         PIG1987 - THE STATE OF THE WORLD - TECHNO   
4  120223459844420080                            THE SOURCE - Streaming   

            objective         campaign_id  impressions  clicks   spend   reach  
0  OUTCOME_ENGAGEMENT  120224907266750080        19751     217   36.42   17051  
1  OUTCOME_ENGAGEMENT  120224906903540080        24213     242   51.50   19608  
2  OUTCOME_ENGAGEMENT  120224906419890080        15864     153   34.61   13942  
3  OUTCOME_ENGAGEMENT  120224903624070080        20157     192   37.61   17679  
4  OUTCOME_ENGAGEMENT  120223459844420080       223162    2553  225.97  136806   

Performance stats:
         impression

In [7]:
# Cell 7: Spend breakdowns
# • Spend per campaign
spend_per_campaign = (
    insights.groupby("campaign_id")["spend"]
            .sum()
            .reset_index()
            .merge(campaigns[["id","name"]],
                   left_on="campaign_id", right_on="id", how="left")
            .loc[:, ["campaign_id","name","spend"]]
            .rename(columns={"spend":"total_spend"})
)

print("Spend per campaign:\n", spend_per_campaign, "\n")

# • Spend per adset (if available)
if "adset_id" in insights.columns:
    spend_per_adset = (
        insights.groupby("adset_id")["spend"]
                .sum()
                .reset_index()
                .merge(adsets[["id","name"]],
                       left_on="adset_id", right_on="id", how="left")
                .loc[:, ["adset_id","name","spend"]]
                .rename(columns={"spend":"total_spend"})
    )
    print("Spend per adset:\n", spend_per_adset)
else:
    print("No adset_id in insights – adset-level spend not available.")


Spend per campaign:
            campaign_id                                              name  \
0   120214803933120080                 THE FEW THAT REMAINED - Streaming   
1   120214961689440080                  PIG1987 - DYSMORPHIA - Streaming   
2   120215277232180080            ENGAGEMENT PROTOCOL - Streaming - Copy   
3   120217792461870080                        PIG1987 - HEAVEN NIGHTCLUB   
4   120218067399650080                 ZONE A0 ANOVUS STREAMING CAMPAIGN   
5   120218209525770080                       ANOVUS - Streaming - Copy 2   
6   120218209686890080     PIG1987 - HEAVEN NIGHTCLUB - Streaming - Copy   
7   120222355643880080    PIG1987 - TRANSFORMER ARCHITECTURE - STREAMING   
8   120223459844420080                            THE SOURCE - Streaming   
9   120224903624070080         PIG1987 - THE STATE OF THE WORLD - TECHNO   
10  120224906419890080    PIG1987 - THE STATE OF THE WORLD - HARD TRANCE   
11  120224906903540080  PIG1987 - THE STATE OF THE WORLD - BROAD SP