In [1]:
# Cell 1: Setup and auto-detect latest landing folder
from pathlib import Path
import os
import pandas as pd
from dotenv import load_dotenv

load_dotenv()
project_root = Path(os.getenv("PROJECT_ROOT"))
landing_dir = project_root / "landing"
subdirs = [d for d in landing_dir.iterdir() if d.is_dir()]
if not subdirs:
    raise FileNotFoundError(f"No subdirectories in {landing_dir}")
data_dir = max(subdirs, key=lambda d: d.stat().st_mtime)

print("Using data directory:", data_dir)
print("Files found:", [p.name for p in data_dir.iterdir()])


Using data directory: C:\Users\Earth\BEDROT PRODUCTIONS\BEDROT DATA LAKE\data_lake\landing\meta_ads_dump_20250527_123754
Files found: ['ads.json', 'adsets.json', 'campaigns.json', 'insights.json']


In [2]:
# Cell 2: Load JSON into DataFrames
ads       = pd.read_json(data_dir / "ads.json")
adsets    = pd.read_json(data_dir / "adsets.json")
campaigns = pd.read_json(data_dir / "campaigns.json")
insights  = pd.read_json(data_dir / "insights.json")

# Ensure numeric types for key metrics
for col in ["spend","impressions","clicks","reach"]:
    if col in insights.columns:
        insights[col] = pd.to_numeric(insights[col], errors="coerce")

# Quick preview
print("Ads:", ads.shape, "Adsets:", adsets.shape)
print("Campaigns:", campaigns.shape, "Insights:", insights.shape)


Ads: (54, 10) Adsets: (31, 13)
Campaigns: (13, 7) Insights: (31, 13)


In [3]:
# Cell 3: File shapes inventory
shapes = pd.DataFrame({
    "file":    ["ads","adsets","campaigns","insights"],
    "rows":    [ads.shape[0], adsets.shape[0], campaigns.shape[0], insights.shape[0]],
    "columns": [ads.shape[1], adsets.shape[1], campaigns.shape[1], insights.shape[1]]
})
display(shapes)


Unnamed: 0,file,rows,columns
0,ads,54,10
1,adsets,31,13
2,campaigns,13,7
3,insights,31,13


In [4]:
# Cell 4: Status distributions
camp_status  = campaigns["status"].value_counts().rename_axis("status").reset_index(name="count")
adset_status = adsets   ["status"].value_counts().rename_axis("status").reset_index(name="count")
ad_status    = ads      ["status"].value_counts().rename_axis("status").reset_index(name="count")

print("=== Campaign Status ===")
display(camp_status)
print("\n=== Adset Status ===")
display(adset_status)
print("\n=== Ad Status ===")
display(ad_status)


=== Campaign Status ===


Unnamed: 0,status,count
0,PAUSED,10
1,ACTIVE,3



=== Adset Status ===


Unnamed: 0,status,count
0,PAUSED,22
1,ACTIVE,9



=== Ad Status ===


Unnamed: 0,status,count
0,ACTIVE,32
1,PAUSED,22


In [5]:
# Cell 5: Relational structure counts
adsets_per_campaign = adsets.groupby("campaign_id").size().reset_index(name="adset_count")
ads_per_adset       = ads   .groupby("adset_id")  .size().reset_index(name="ads_count")

print("Adsets per Campaign:")
display(adsets_per_campaign.head())
print("\nAds per Adset:")
display(ads_per_adset.head())


Adsets per Campaign:


Unnamed: 0,campaign_id,adset_count
0,120214803933120080,1
1,120214961689440080,1
2,120215277232180080,1
3,120217792461870080,2
4,120218067399650080,2



Ads per Adset:


Unnamed: 0,adset_id,ads_count
0,120214803933310080,3
1,120214961689410080,3
2,120215277232140080,3
3,120217792461860080,3
4,120218066028920080,3


In [6]:
# Cell 6: Campaign performance overview & summary stats
camp_perf = (
    campaigns[["id","name","objective"]]
    .merge(insights[["campaign_id","impressions","clicks","spend","reach"]],
           left_on="id", right_on="campaign_id", how="left")
)

perf_stats = camp_perf[["impressions","clicks","spend","reach"]].describe()

print("Sample Campaign Performance:")
display(camp_perf.head())
print("\nPerformance Summary Statistics:")
display(perf_stats)


Sample Campaign Performance:


Unnamed: 0,id,name,objective,campaign_id,impressions,clicks,spend,reach
0,120224907266750080,PIG1987 - THE STATE OF THE WORLD - BROAD,OUTCOME_ENGAGEMENT,120224907266750080,1321,8,3.41,1299
1,120224907266750080,PIG1987 - THE STATE OF THE WORLD - BROAD,OUTCOME_ENGAGEMENT,120224907266750080,11562,146,19.43,10146
2,120224907266750080,PIG1987 - THE STATE OF THE WORLD - BROAD,OUTCOME_ENGAGEMENT,120224907266750080,4238,51,8.33,4082
3,120224907266750080,PIG1987 - THE STATE OF THE WORLD - BROAD,OUTCOME_ENGAGEMENT,120224907266750080,1380,13,3.14,1331
4,120224907266750080,PIG1987 - THE STATE OF THE WORLD - BROAD,OUTCOME_ENGAGEMENT,120224907266750080,3822,27,6.15,3572



Performance Summary Statistics:


Unnamed: 0,impressions,clicks,spend,reach
count,31.0,31.0,31.0,31.0
mean,169543.1,1002.354839,60.930968,64095.870968
std,446346.8,1917.33926,90.669035,127752.492162
min,1229.0,7.0,3.14,1182.0
25%,2494.0,24.0,6.595,2341.5
50%,6214.0,68.0,11.5,5941.0
75%,33939.0,246.5,45.635,32020.5
max,2294817.0,7828.0,259.37,580554.0


In [7]:
# Cell 7: Spend breakdowns

# Spend per campaign
spend_per_campaign = (
    insights
    .groupby("campaign_id")["spend"].sum().reset_index()
    .merge(campaigns[["id","name"]], left_on="campaign_id", right_on="id", how="left")
    .rename(columns={"name":"campaign_name","spend":"total_spend"})
    .loc[:,["campaign_id","campaign_name","total_spend"]]
)
print("Spend per Campaign:")
display(spend_per_campaign)

# Spend per adset
if "adset_id" in insights.columns:
    spend_per_adset = (
        insights
        .groupby("adset_id")["spend"].sum().reset_index()
        .merge(adsets[["id","name"]], left_on="adset_id", right_on="id", how="left")
        .rename(columns={"name":"adset_name","spend":"total_spend"})
        .loc[:,["adset_id","adset_name","total_spend"]]
    )
    print("\nSpend per Adset:")
    display(spend_per_adset)
else:
    print("\nNo 'adset_id' field found in insights—cannot compute adset-level spend.")


Spend per Campaign:


Unnamed: 0,campaign_id,campaign_name,total_spend
0,120214803933120080,THE FEW THAT REMAINED - Streaming,129.2
1,120214961689440080,PIG1987 - DYSMORPHIA - Streaming,252.99
2,120215277232180080,ENGAGEMENT PROTOCOL - Streaming - Copy,241.34
3,120217792461870080,PIG1987 - HEAVEN NIGHTCLUB,85.07
4,120218067399650080,ZONE A0 ANOVUS STREAMING CAMPAIGN,89.21
5,120218209525770080,ANOVUS - Streaming - Copy 2,259.37
6,120218209686890080,PIG1987 - HEAVEN NIGHTCLUB - Streaming - Copy,223.3
7,120222355643880080,PIG1987 - TRANSFORMER ARCHITECTURE - STREAMING,207.97
8,120223459844420080,THE SOURCE - Streaming,229.72
9,120224903624070080,PIG1987 - THE STATE OF THE WORLD - TECHNO,40.07



Spend per Adset:


Unnamed: 0,adset_id,adset_name,total_spend
0,120214803933310080,New Engagement Ad Set,129.2
1,120214961689410080,New Engagement Ad Set,252.99
2,120215277232140080,New Engagement Ad Set,241.34
3,120217792461860080,New Engagement Ad Set,41.71
4,120218066028920080,New Engagement Ad Set - TARGET AUDIENCE Indust...,43.36
5,120218067399620080,New Engagement Ad Set - No Audience Target,41.3
6,120218067787880080,New Engagement Ad Set - TARGET AUDIENCE DNB,47.91
7,120218209525730080,New Engagement Ad Set,259.37
8,120218209686880080,New Engagement Ad Set,223.3
9,120222355643870080,New Engagement Ad Set,207.97
