# New Title Launch Support Checklist v2

Author: @haewon.yum

Automated health check for given bundles with campaign-level diagnostics.

**Bundle-level checks:**
1. PA Status (iOS) & Attribution Method
2. Revenue Postback Reception

**Campaign-level checks (scoped by country/campaign):**
3. VT Install Reception
4. Creative Format Impressions
5. Kakao Bizboard (KOR only)
6. Bid Filter Rate + Anomaly Detection
7. CT Install Leakage & Rejected Installs

In [1]:
# #@title Colab Authentication
# from google.colab import auth
# auth.authenticate_user()

In [2]:
#@title Environment Setup

from google.cloud import bigquery
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_colwidth', 80)

client = bigquery.Client(project='moloco-ods')

def run_query(query, label=''):
    """Run a BQ query and return DataFrame. Print row count."""
    try:
        df = client.query(query).result().to_dataframe()
        status = f'✅ {label}: {len(df)} rows' if len(df) > 0 else f'⚠️ {label}: 0 rows — check needed'
        print(status)
        return df
    except Exception as e:
        print(f'❌ {label}: Query failed — {e}')
        return pd.DataFrame()

def parse_csv_input(raw):
    """Parse comma-separated input into a clean list."""
    if not raw or not raw.strip():
        return []
    return [v.strip() for v in raw.split(',') if v.strip()]

def sql_in_clause(values, field):
    """Generate 'AND field IN (...)' or empty string if no values."""
    if not values:
        return ''
    quoted = ', '.join(f"'{v}'" for v in values)
    return f"AND {field} IN ({quoted})"

In [3]:
#@title Parameters

BUNDLE_IDS_RAW = '6739616715'  #@param {type:"string"}
TARGET_COUNTRIES_RAW = ''  #@param {type:"string"}
CAMPAIGN_IDS_RAW = ''  #@param {type:"string"}

bundle_ids = parse_csv_input(BUNDLE_IDS_RAW)
target_countries = [c.upper() for c in parse_csv_input(TARGET_COUNTRIES_RAW)]
campaign_ids = parse_csv_input(CAMPAIGN_IDS_RAW)

assert bundle_ids, '❌ BUNDLE_IDS is required'

print(f'Bundles:   {bundle_ids}')
print(f'Countries: {target_countries or "(all)"}')
print(f'Campaigns: {campaign_ids or "(all)"}')

Bundles:   ['6739616715']
Countries: (all)
Campaigns: (all)


In [4]:
#@title Bundle Resolution

_bundle_list = ', '.join(f"'{b}'" for b in bundle_ids)

q_resolve = f"""
WITH bundle_resolve AS (
  SELECT DISTINCT
    pd.app_store_bundle,
    pd.app_tracking_bundle,
    pd.tracking_bundle,
    pd.title AS product_title,
    pd.os,
    mmp.mmp_bundle_id
  FROM `focal-elf-631.standard_digest.product_digest` pd
  LEFT JOIN (
    SELECT DISTINCT advertiser.mmp_bundle_id, product.app_market_bundle
    FROM `moloco-ae-view.athena.fact_dsp_core`
    WHERE date_utc >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY)
      AND advertiser.mmp_bundle_id IS NOT NULL
  ) mmp ON pd.app_store_bundle = mmp.app_market_bundle
  WHERE NOT pd.is_archived
    AND (
      pd.app_store_bundle IN ({_bundle_list})
      OR pd.app_tracking_bundle IN ({_bundle_list})
      OR pd.tracking_bundle IN ({_bundle_list})
      OR mmp.mmp_bundle_id IN ({_bundle_list})
    )
)
SELECT * FROM bundle_resolve
"""

df_bundles = run_query(q_resolve, 'Bundle Resolution')
assert not df_bundles.empty, '❌ Could not resolve any bundles'

print(df_bundles[['tracking_bundle', 'mmp_bundle_id', 'app_store_bundle', 'os', 'product_title']].drop_duplicates().to_string(index=False))

bundles = []
for _, row in df_bundles.drop_duplicates(subset='tracking_bundle').iterrows():
    bundles.append({
        'tracking_bundle': row.get('tracking_bundle') or row.get('app_tracking_bundle'),
        'mmp_bundle_id': row.get('mmp_bundle_id'),
        'app_store_bundle': row.get('app_store_bundle'),
        'os': str(row.get('os', 'UNKNOWN')).upper(),
        'title': row.get('product_title', ''),
    })
    b = bundles[-1]
    print(f"\n── {b['title']} ({b['os']}) ──")
    print(f"  tracking: {b['tracking_bundle']}  mmp: {b['mmp_bundle_id']}  store: {b['app_store_bundle']}")

✅ Bundle Resolution: 2 rows
tracking_bundle mmp_bundle_id app_store_bundle  os              product_title
     6739616715    6739616715       6739616715 IOS   MapleStoryIdleRPG_iOS_KR
     6739616715    6739616715       6739616715 IOS MapleStory Idle RPG_iOS_GL

── MapleStoryIdleRPG_iOS_KR (IOS) ──
  tracking: 6739616715  mmp: 6739616715  store: 6739616715


In [5]:
#@title Campaign Resolution

_mmp_bundles = [b['mmp_bundle_id'] for b in bundles if b['mmp_bundle_id']]
_mmp_in = ', '.join(f"'{v}'" for v in _mmp_bundles)

q_campaigns = f"""
SELECT
  campaign_id,
  campaign.country AS country,
  advertiser.mmp_bundle_id AS mmp_bundle_id,
  ROUND(SUM(gross_spend_usd), 2) AS spend_L14D,
  SUM(installs) AS installs_L14D
FROM `moloco-ae-view.athena.fact_dsp_core`
WHERE date_utc BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 14 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
  AND advertiser.mmp_bundle_id IN ({_mmp_in})
GROUP BY 1, 2, 3
HAVING spend_L14D > 0
ORDER BY spend_L14D DESC
"""

df_all_campaigns = run_query(q_campaigns, 'All Campaigns for Bundles')

# Apply campaign resolution logic
if campaign_ids and target_countries:
    mask = (df_all_campaigns['country'].isin(target_countries)) | (df_all_campaigns['campaign_id'].isin(campaign_ids))
    scope_label = f'campaigns in {target_countries} + explicitly listed campaigns'
elif campaign_ids:
    mask = df_all_campaigns['campaign_id'].isin(campaign_ids)
    scope_label = f'explicitly listed campaigns only'
elif target_countries:
    mask = df_all_campaigns['country'].isin(target_countries)
    scope_label = f'all campaigns in {target_countries}'
else:
    mask = pd.Series(True, index=df_all_campaigns.index)
    scope_label = 'all campaigns under bundles'

df_campaigns = df_all_campaigns[mask].copy()
resolved_campaign_ids = df_campaigns['campaign_id'].unique().tolist()
resolved_countries = df_campaigns['country'].unique().tolist()

print(f'\n── Campaign Scope: {scope_label} ──')
print(f'  Campaigns: {len(resolved_campaign_ids)}')
print(f'  Countries: {resolved_countries}')
print(f'  Total spend (L14D): ${df_campaigns["spend_L14D"].sum():,.2f}')
print(f'  Total installs (L14D): {df_campaigns["installs_L14D"].sum():,.0f}')

# Build reusable SQL filter clauses
_campaign_in = sql_in_clause(resolved_campaign_ids, 'campaign_id')
_campaign_in_api = sql_in_clause(resolved_campaign_ids, 'api.campaign.id')
_country_in = sql_in_clause(resolved_countries, 'campaign.country')

df_campaigns.head(20)

✅ All Campaigns for Bundles: 29 rows

── Campaign Scope: all campaigns under bundles ──
  Campaigns: 26
  Countries: ['KOR', 'TWN', 'USA', 'THA', 'SGP', 'AUS', 'MYS', 'CAN', 'HKG', 'NLD', 'SWE', 'GBR', 'ARE', 'FRA']
  Total spend: $603,628.73
  Total installs: 23,029


Unnamed: 0,campaign_id,country,mmp_bundle_id,spend,installs
0,nkgLw08ChONIguL7,KOR,6739616715,288647.1,8541
1,JoCrqYDTvu6mZpyk,TWN,6739616715,65771.56,1631
2,zNnzKM4Xlv4dz6wy,USA,6739616715,64305.47,1636
3,IlPj0mrFUXGcmsPq,TWN,6739616715,56455.39,1149
4,lODdzol6sO7YsciH,THA,6739616715,12189.36,1084
5,OVQzKNG8XhjWzvKB,SGP,6739616715,11291.73,239
6,QjPlOeOkwOg44ArD,AUS,6739616715,11079.85,538
7,KlYpzEzw8nWyFLyy,MYS,6739616715,10080.88,670
8,DcDyfAs7sFBF5Cve,SGP,6739616715,9844.78,241
9,KlVPGVaDdQ5U39Xa,USA,6739616715,9360.36,3688


---
## Bundle-Level Checks

### 1-A. PA Status (iOS only)

In [6]:
#@title 1-A. PA Status (iOS only)

ios_bundles = [b for b in bundles if b['os'] == 'IOS']
if not ios_bundles:
    print('⏭️ No iOS bundles — skipping PA Status check')
    df_1a = pd.DataFrame()
else:
    _tb_list = ', '.join(f"'{b['tracking_bundle']}'" for b in ios_bundles)
    q_1a = f"""
    SELECT
      utc_date,
      mmp,
      tracking_bundle,
      verdict.fp_status AS pa_status,
      verdict.opt_with_ifa,
      verdict.opt_with_mas,
      warning.appsflyer_ap_on,
      warning.appsflyer_aap_enabled,
      warning.appsflyer_vt_pa_enabled,
      attr.att.total AS attr_att_optin,
      attr.no_att.total AS attr_att_optout,
      attr.no_att.privacy AS attr_privacy_count,
      no_attr.att.total AS noattr_att_optin,
      no_attr.no_att.total AS noattr_att_optout,
      ROUND(spend.total, 2) AS spend_usd_L14D
    FROM `focal-elf-631.mmp_pb_summary.app_status`
    WHERE tracking_bundle IN ({_tb_list})
      AND utc_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 14 DAY)
    ORDER BY tracking_bundle, utc_date DESC
    """
    df_1a = run_query(q_1a, '1-A PA Status (iOS)')
    df_1a

✅ 1-A PA Status (iOS): 14 rows


### 1-B. PA Attribution Method

In [7]:
#@title 1-B. PA Attribution Method (iOS + Android)

_tb_list_all = ', '.join(f"'{b['tracking_bundle']}'" for b in bundles)
q_1b = f"""
SELECT
  DATE(timestamp) AS date,
  api.product.app.tracking_bundle AS tracking_bundle,
  cv.mmp,
  cv.pb.attribution.method AS attribution_method,
  cv.pb.attribution.raw_method AS raw_method,
  COUNT(*) AS install_count
FROM `focal-elf-631.prod_stream_view.cv`
WHERE DATE(timestamp) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
  AND UPPER(cv.event) = 'INSTALL'
  AND api.product.app.tracking_bundle IN ({_tb_list_all})
GROUP BY 1, 2, 3, 4, 5
ORDER BY 1 DESC, 6 DESC
"""

df_1b = run_query(q_1b, '1-B PA Attribution Method')
if not df_1b.empty:
    has_pa = df_1b['attribution_method'].str.contains('probabilistic|modeled|fingerprint', case=False, na=False).any()
    print(f'  → PA postbacks detected: {"YES ✅" if has_pa else "NO ⚠️"}')
    methods = df_1b['attribution_method'].dropna().unique().tolist()
    print(f'  → Attribution methods seen: {methods}')
df_1b

✅ 1-B PA Attribution Method: 14 rows
  → PA postbacks detected: YES ✅
  → Attribution methods seen: ['PROBABILISTIC', 'IDENTIFIER']


Unnamed: 0,date,tracking_bundle,mmp,attribution_method,raw_method,install_count
0,2026-02-26,6739616715,ADJUST,PROBABILISTIC,probabilistic_matching,1264
1,2026-02-26,6739616715,ADJUST,IDENTIFIER,device_tag,181
2,2026-02-25,6739616715,ADJUST,PROBABILISTIC,probabilistic_matching,1172
3,2026-02-25,6739616715,ADJUST,IDENTIFIER,device_tag,131
4,2026-02-24,6739616715,ADJUST,PROBABILISTIC,probabilistic_matching,1240
5,2026-02-24,6739616715,ADJUST,IDENTIFIER,device_tag,91
6,2026-02-23,6739616715,ADJUST,PROBABILISTIC,probabilistic_matching,1306
7,2026-02-23,6739616715,ADJUST,IDENTIFIER,device_tag,101
8,2026-02-22,6739616715,ADJUST,PROBABILISTIC,probabilistic_matching,1515
9,2026-02-22,6739616715,ADJUST,IDENTIFIER,device_tag,90


### 2. Revenue Postback Reception

In [8]:
#@title 2. Revenue Postback Reception

q_rev = f"""
SELECT
  DATE(timestamp) AS date,
  api.product.app.tracking_bundle AS tracking_bundle,
  cv.event_pb AS event_name,
  COUNT(*) AS event_count_L7D,
  COUNTIF(cv.revenue_usd.amount > 0) AS events_with_revenue_L7D,
  ROUND(SUM(cv.revenue_usd.amount), 2) AS total_revenue_usd_L7D
FROM `focal-elf-631.prod_stream_view.cv`
WHERE DATE(timestamp) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
  AND api.product.app.tracking_bundle IN ({_tb_list_all})
  AND cv.revenue_usd.amount IS NOT NULL
GROUP BY 1, 2, 3
ORDER BY 1 DESC, 6 DESC
"""

df_rev = run_query(q_rev, '2. Revenue Postback')
if not df_rev.empty:
    for tb in df_rev['tracking_bundle'].unique():
        tb_df = df_rev[df_rev['tracking_bundle'] == tb]
        total_rev = tb_df['total_revenue_usd_L7D'].sum()
        print(f'  → {tb}: ${total_rev:,.2f} revenue, {tb_df["events_with_revenue_L7D"].sum():,} events')
        print(f'    Event types: {tb_df["event_name"].unique().tolist()}')
else:
    print('  → ⚠️ No revenue postbacks found for any bundle')
df_rev

⚠️ 2. Revenue Postback: 0 rows — check needed
  → ⚠️ No revenue postbacks found for any bundle


Unnamed: 0,date,tracking_bundle,revenue_events,campaigns_with_revenue


---
## Campaign-Level Checks

### 3. VT Install Check

In [9]:
#@title 3. VT Install Check

q_vt = f"""
SELECT
  DATE(timestamp) AS date,
  api.product.app.tracking_bundle AS tracking_bundle,
  cv.view_through AS is_view_through,
  cv.pb.attribution.method AS attribution_method,
  cv.pb.attribution.viewthrough AS pb_viewthrough,
  COUNT(*) AS install_count
FROM `focal-elf-631.prod_stream_view.cv`
WHERE DATE(timestamp) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
  AND UPPER(cv.event) = 'INSTALL'
  AND api.product.app.tracking_bundle IN ({_tb_list_all})
  {_campaign_in_api}
GROUP BY 1, 2, 3, 4, 5
ORDER BY 1 DESC, 6 DESC
"""

df_vt = run_query(q_vt, '3. VT Install')
if not df_vt.empty:
    has_vt = df_vt['is_view_through'].any() or df_vt['pb_viewthrough'].any()
    print(f'  → VT installs detected: {"YES ✅" if has_vt else "NO ⚠️"}')
df_vt

✅ 3. VT Install: 28 rows
  → VT installs detected: YES ✅


Unnamed: 0,date,tracking_bundle,is_view_through,attribution_method,install_count
0,2026-02-26,6739616715,False,PROBABILISTIC,757
1,2026-02-26,6739616715,True,PROBABILISTIC,507
2,2026-02-26,6739616715,False,IDENTIFIER,180
3,2026-02-26,6739616715,True,IDENTIFIER,1
4,2026-02-25,6739616715,False,PROBABILISTIC,720
5,2026-02-25,6739616715,True,PROBABILISTIC,452
6,2026-02-25,6739616715,False,IDENTIFIER,129
7,2026-02-25,6739616715,True,IDENTIFIER,2
8,2026-02-24,6739616715,False,PROBABILISTIC,738
9,2026-02-24,6739616715,True,PROBABILISTIC,502


### 4-A. Creatives Configured

In [10]:
#@title 4-A. Creatives Configured

_store_list = ', '.join(f"'{b['app_store_bundle']}'" for b in bundles)
q_4a = f"""
WITH product AS (
  SELECT product_id, platform
  FROM `focal-elf-631.standard_digest.product_digest`
  WHERE app_store_bundle IN ({_store_list}) AND NOT is_archived
)
SELECT
  cd.product_id,
  cd.creative_id,
  cd.creative_title,
  cd.creative_type,
  cd.is_archived,
  cd.timestamp AS last_updated
FROM `focal-elf-631.standard_digest.creative_digest` cd
INNER JOIN product p ON cd.product_id = p.product_id AND cd.platform = p.platform
ORDER BY cd.is_archived, cd.creative_type, cd.timestamp DESC
"""

df_4a = run_query(q_4a, '4-A Creatives Configured')
if not df_4a.empty:
    active = df_4a[df_4a['is_archived'] == False]
    print(f'  → Active creatives: {len(active)}, Archived: {len(df_4a) - len(active)}')
    print(f'  → Active formats: {active["creative_type"].value_counts().to_dict()}')
df_4a.head(20)

✅ 4-A Creatives Configured: 4549 rows
  → Active creatives: 4545, Archived: 4
  → Active formats: {'IMAGE': 2353, 'VIDEO': 891, 'NATIVE_VIDEO': 889, 'NATIVE_IMAGE': 412}


Unnamed: 0,product_id,creative_id,creative_title,creative_type,is_archived,last_updated
0,k8vOAXuuyirA2X83,tinszMp8n2kzyFSe,IMG_2602_D5207_AL-D-WhichOneRU-EN_MI_336x280_F.jpg,IMAGE,False,2026-02-27 02:57:33.506444+00:00
1,k8vOAXuuyirA2X83,FWH4jJfhHUZ0mWIH,IMG_2602_D5207_AL-D-WhichOneRU-EN_MI_300x50_F.jpg,IMAGE,False,2026-02-27 02:57:33.506444+00:00
2,k8vOAXuuyirA2X83,Btnx7yt908bm5Rb1,IMG_2602_HM_AL-E-IdleReward30-TW_MI_250x250_F.jpg,IMAGE,False,2026-02-27 02:57:33.506444+00:00
3,k8vOAXuuyirA2X83,GRfzw0JOFqXQoqqJ,IMG_2602_D5207_AL-D-WhichOneRU-EN_MI_320x100_F.jpg,IMAGE,False,2026-02-27 02:57:33.506444+00:00
4,k8vOAXuuyirA2X83,wSdzDpFraCAjMsJJ,IMG_2602_HM_AL-H-CombatGimmick25-TW_MI_160x600_F.jpg,IMAGE,False,2026-02-27 02:57:33.506444+00:00
5,NwnCn0wf9qYwKE1r,aiFvzHnD4ky8vZ2h,IMG_2602_BUFF_AL-D-Customization26-KR_MI_468x60_F.jpg,IMAGE,False,2026-02-27 02:57:33.506444+00:00
6,NwnCn0wf9qYwKE1r,ue1SKD1u4hmiC8GH,IMG_2602_BUFF_AL-D-Customization26-KR_MI_320x100_F.jpg,IMAGE,False,2026-02-27 02:57:33.506444+00:00
7,k8vOAXuuyirA2X83,I8aYrkZrrsygtePT,IMG_2602_HM_AL-H-CombatGimmick25-TW_MI_250x250_F.jpg,IMAGE,False,2026-02-27 02:57:33.506444+00:00
8,k8vOAXuuyirA2X83,T2OYfzC0mFrLjt4u,IMG_2602_D5186_AL-G-FreeReward-EN_MI_336x280_F.jpg,IMAGE,False,2026-02-27 02:57:33.506444+00:00
9,k8vOAXuuyirA2X83,VXP0twqZovyqXyre,IMG_2602_HM_AL-F-DiverseContent25-TW_MI_728x90_F.jpg,IMAGE,False,2026-02-27 02:57:33.506444+00:00


### 4-B. Creative Impressions by Format

In [11]:
#@title 4-B. Creative Impressions by Format

_mmp_in_clause = sql_in_clause(_mmp_bundles, 'advertiser.mmp_bundle_id')
q_4b = f"""
SELECT
  date_utc AS date,
  campaign_id,
  campaign.country,
  creative.format AS creative_format,
  COUNT(DISTINCT creative.id) AS n_creatives_L7D,
  SUM(impressions) AS impressions_L7D,
  SUM(clicks) AS clicks_L7D,
  ROUND(SUM(gross_spend_usd), 2) AS spend_L7D,
  SUM(installs) AS installs_L7D
FROM `moloco-ae-view.athena.fact_dsp_creative`
WHERE date_utc BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
  {_mmp_in_clause}
  {_campaign_in}
GROUP BY 1, 2, 3, 4
ORDER BY 1 DESC, 5 DESC
"""

df_4b = run_query(q_4b, '4-B Creative Impressions')
if not df_4b.empty:
    fmt_summary = df_4b.groupby('creative_format').agg(
        impressions=('impressions_L7D', 'sum'), spend=('spend_L7D', 'sum')).sort_values('impressions', ascending=False)
    total_imps = fmt_summary['impressions'].sum()
    total_spend = fmt_summary['spend'].sum()
    print(f'  → Formats with impressions:')
    for fmt, row in fmt_summary.iterrows():
        imp_pct = row['impressions'] / total_imps * 100 if total_imps > 0 else 0
        spend_pct = row['spend'] / total_spend * 100 if total_spend > 0 else 0
        print(f'     {fmt}: {row["impressions"]:,.0f} imps ({imp_pct:.1f}%), ${row["spend"]:,.2f} spend ({spend_pct:.1f}%)')

    chart_df = df_4b.groupby(['date', 'creative_format'])['impressions_L7D'].sum().reset_index()
    daily_total = chart_df.groupby('date')['impressions_L7D'].transform('sum')
    chart_df['pct'] = (chart_df['impressions_L7D'] / daily_total * 100).round(1)
    chart_df['date'] = chart_df['date'].astype(str)
    fig = px.bar(chart_df, x='date', y='impressions_L7D', color='creative_format',
                 title='4-B: Daily Impressions by Creative Format',
                 labels={'impressions_L7D': 'Impressions', 'date': '', 'creative_format': 'Format'},
                 hover_data={'pct': ':.1f'})
    fig.update_traces(hovertemplate='%{data.name}<br>%{y:,.0f} imps (%{customdata[0]:.1f}%)<extra></extra>')
    fig.update_layout(barmode='stack', height=400)
    fig.show()
df_4b

✅ 4-B Creative Impressions: 1463 rows
  → Formats with impressions:
     ib: 341,902,279 imps, $91,016.89 spend
     ni: 44,998,422 imps, $10,976.13 spend
     nl: 27,856,286 imps, $5,799.07 spend
     vi: 11,134,026 imps, $176,789.23 spend
     nv: 6,802,834 imps, $4,222.17 spend
     vb: 1,636,446 imps, $1,088.18 spend
     ii: 625,862 imps, $2,993.39 spend


Unnamed: 0,date,campaign_id,country,creative_format,impressions,clicks,spend
0,2026-02-26,nkgLw08ChONIguL7,KOR,ib,30459543,18356,9057.450000000
1,2026-02-26,zNnzKM4Xlv4dz6wy,USA,ib,4597496,3911,1363.940000000
2,2026-02-26,nkgLw08ChONIguL7,KOR,ni,3510492,9700,812.830000000
3,2026-02-26,IlPj0mrFUXGcmsPq,TWN,ib,2532796,2406,802.610000000
4,2026-02-26,JoCrqYDTvu6mZpyk,TWN,ib,1315480,2111,292.780000000
...,...,...,...,...,...,...,...
1458,2026-02-20,JoCrqYDTvu6mZpyk,,,0,0,0E-9
1459,2026-02-20,TvqngHWcy3qak2Bt,FRA,vi,0,0,0E-9
1460,2026-02-20,hApo1fkBVypgDBFO,HKG,vi,0,0,0E-9
1461,2026-02-20,VfGyuyAUtkXUq8R0,GBR,nl,0,0,0E-9


### 5. Kakao Bizboard (KOR only)

In [12]:
#@title 5. Kakao Bizboard (KOR only)

if 'KOR' not in resolved_countries:
    print('⏭️ KOR not in scope — skipping Kakao Bizboard check')
    df_5 = pd.DataFrame()
else:
    _kor_campaigns = df_campaigns[df_campaigns['country'] == 'KOR']['campaign_id'].unique().tolist()
    _kor_camp_in = sql_in_clause(_kor_campaigns, 'campaign_id')
    q_5 = f"""
    SELECT
      date_utc,
      creative.format AS cr_format,
      creative.size,
      creative.title AS cr_title,
      creative.id AS cr_id,
      SUM(impressions) AS impressions_L7D,
      ROUND(SUM(gross_spend_usd), 2) AS gross_spend_usd_L7D
    FROM `moloco-ae-view.athena.fact_dsp_creative`
    WHERE date_utc BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
      {_mmp_in_clause}
      {_kor_camp_in}
      AND campaign.country = 'KOR'
      AND LOWER(exchange) LIKE '%kakao%'
      AND creative.size = '1029x258'
    GROUP BY 1, 2, 3, 4, 5
    ORDER BY 1 DESC, 7 DESC
    """
    df_5 = run_query(q_5, '5. Kakao Bizboard 1029x258')
    if df_5.empty:
        print('  → No Bizboard (1029x258) impressions on Kakao.')
        print('    Check: creative uploaded? Under review? Wrong dimensions?')
    else:
        print(f'  → Bizboard serving: {df_5["impressions_L7D"].sum():,.0f} total imps, ${df_5["gross_spend_usd_L7D"].sum():,.2f} total spend')
    df_5

❌ 5. Kakao Bizboard 1029x258: Query failed — 400 Field name width does not exist in STRUCT<end_card_file_name STRING, end_card_type STRING, format STRING, ...>; Did you mean id? at [14:20]; reason: invalidQuery, location: query, message: Field name width does not exist in STRUCT<end_card_file_name STRING, end_card_type STRING, format STRING, ...>; Did you mean id? at [14:20]

Location: US
Job ID: 932259b0-5cad-4986-8e4c-1e615ed7634f

  → ⚠️ No Kakao Bizboard (1029x258) impressions found


### 6-A. Bid Filter — Pricing (post-pricing stage)

In [13]:
#@title 6-A. Bid Filter — Pricing (post-pricing stage)

if not resolved_campaign_ids:
    print('⏭️ No campaigns resolved — skipping')
    df_6a = pd.DataFrame()
else:
    _camp_in_pricing = sql_in_clause(resolved_campaign_ids, 'cand.campaign_id')
    q_6a = f"""
    WITH pricing_data AS (
      SELECT
        DATE(timestamp) AS date,
        cand.campaign_id,
        cand.candidate_result,
        cand.core.reason AS core_reason,
        COUNT(*) AS cnt
      FROM `focal-elf-631.prod_stream_view.pricing`,
        UNNEST(pricing.candidates) AS cand
      WHERE DATE(timestamp) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
        {_camp_in_pricing}
      GROUP BY 1, 2, 3, 4
    )
    SELECT *, ROUND(100.0 * cnt / SUM(cnt) OVER (PARTITION BY date, campaign_id), 2) AS pct_of_day
    FROM pricing_data
    ORDER BY date DESC, cnt DESC
    """
    df_6a = run_query(q_6a, '6-A Bid Filter (pricing)')
    if not df_6a.empty:
        PASSED_RESULTS = {'CommitBid', 'InternalAuctionWinner'}
        passed_mask = df_6a['candidate_result'].isin(PASSED_RESULTS)
        filtered_df = df_6a[~passed_mask]

        if not filtered_df.empty:
            chart_df = filtered_df.groupby(['date', 'candidate_result'])['cnt'].sum().reset_index()
            daily_total = chart_df.groupby('date')['cnt'].transform('sum')
            chart_df['pct'] = (chart_df['cnt'] / daily_total * 100).round(2)
            chart_df['date'] = chart_df['date'].astype(str)
            fig = px.bar(chart_df, x='date', y='pct', color='candidate_result',
                         title='6-A: Daily Filter Reason Breakdown (pricing stage)',
                         labels={'pct': '% of Filtered', 'date': '', 'candidate_result': 'Reason'},
                         hover_data=['cnt'])
            fig.update_layout(barmode='stack', yaxis_ticksuffix='%', height=450)
            fig.show()
    df_6a

✅ 6-A Bid Filter (pricing): 1036 rows


### 6-B. Bid Filter — Campaign Trace (pre-pricing stages)

In [14]:
#@title 6-B. Bid Filter — Campaign Trace (pre-pricing stages)

if not resolved_campaign_ids:
    print('⏭️ No campaigns resolved — skipping')
    df_6b = pd.DataFrame()
else:
    _camp_in_trace = sql_in_clause(resolved_campaign_ids, 'campaign')
    q_6b = f"""
    SELECT
      date,
      campaign,
      reason_block,
      reason,
      reason_raw,
      ROUND(SUM(1 / rate) / 1e6, 2) AS estimated_req_millions
    FROM `moloco-data-prod.younghan.campaign_trace_raw_prod`
    WHERE date BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 3 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
      {_camp_in_trace}
      AND reason_block IN ('Get candidate campaigns', 'Evaluate candidate campaigns', 'get candidate ad_groups')
    GROUP BY 1, 2, 3, 4, 5
    ORDER BY 1 DESC, 6 DESC
    """
    df_6b = run_query(q_6b, '6-B Bid Filter (trace)')
    if not df_6b.empty:
        DETAIL_REASONS = {'(campaign) Ctx', '(campaign) Req', '(ad_group) Ctx', '(ad_group) Req'}
        df_6b['reason_label'] = df_6b.apply(
            lambda r: f"{r['reason']}: {r['reason_raw']}" if r['reason'] in DETAIL_REASONS and pd.notna(r['reason_raw']) else r['reason'],
            axis=1)

        BLOCK_ORDER = ['Get candidate campaigns', 'Evaluate candidate campaigns', 'get candidate ad_groups']
        blocks = [b for b in BLOCK_ORDER if b in df_6b['reason_block'].values]
        fig = make_subplots(rows=len(blocks), cols=1, subplot_titles=blocks, vertical_spacing=0.08)

        legend_seen = set()
        for idx, block in enumerate(blocks):
            block_df = df_6b[df_6b['reason_block'] == block]
            chart_df = block_df.groupby(['date', 'reason_label'])['estimated_req_millions'].sum().reset_index()
            daily_total = chart_df.groupby('date')['estimated_req_millions'].transform('sum')
            chart_df['pct'] = (chart_df['estimated_req_millions'] / daily_total * 100).round(2)
            chart_df['date'] = chart_df['date'].astype(str)
            top_reasons = chart_df.groupby('reason_label')['estimated_req_millions'].sum().sort_values(ascending=False).head(10).index
            chart_df = chart_df[chart_df['reason_label'].isin(top_reasons)]

            for reason in top_reasons:
                r_df = chart_df[chart_df['reason_label'] == reason]
                show = reason not in legend_seen
                legend_seen.add(reason)
                fig.add_trace(go.Bar(
                    x=r_df['date'], y=r_df['pct'], name=reason,
                    hovertemplate=f'{reason}<br>%{{y:.1f}}%<br>%{{customdata[0]:.2f}}M req',
                    customdata=r_df[['estimated_req_millions']].values,
                    legendgroup=reason, showlegend=show,
                ), row=idx+1, col=1)

        fig.update_layout(barmode='stack', height=400 * len(blocks),
                          title_text='6-B: Daily Filter Reason Breakdown by Pipeline Stage',
                          legend_title_text='reason', margin=dict(r=250))
        for i in range(len(blocks)):
            fig.update_yaxes(ticksuffix='%', row=i+1, col=1)
        fig.show()
    df_6b

✅ 6-B Bid Filter (trace): 787948 rows


### 6-C. Bid Filter Anomaly Detection

In [15]:
#@title 6-C. Bid Filter Anomaly Detection

bid_filter_alerts = []

# ── Check 1: Funnel Pass-Through Rate (6-B) ──
if not df_6b.empty:
    BLOCK_ORDER = ['Get candidate campaigns', 'Evaluate candidate campaigns', 'get candidate ad_groups']
    funnel_rows = []
    for block in BLOCK_ORDER:
        block_df = df_6b[df_6b['reason_block'] == block]
        if block_df.empty:
            continue
        total_vol = block_df['estimated_req_millions'].sum()
        funnel_rows.append({'stage': block, 'volume_M': round(total_vol, 2)})

    if funnel_rows:
        print('── Funnel Pass-Through (6-B) ──')
        for i, row in enumerate(funnel_rows):
            if i == 0:
                row['pass_rate'] = None
                print(f"  {row['stage']:40s}  {row['volume_M']:>8.2f}M  (entry)")
            else:
                prev_vol = funnel_rows[i-1]['volume_M']
                survived = prev_vol - row['volume_M']
                rate = (survived / prev_vol * 100) if prev_vol > 0 else 0
                row['pass_rate'] = round(rate, 1)
                flag = ' ⚠️' if rate < 5 else ''
                print(f"  {row['stage']:40s}  {row['volume_M']:>8.2f}M  pass_rate={rate:.1f}%{flag}")
                if rate < 5:
                    bid_filter_alerts.append({
                        'type': 'low_passthrough', 'stage': row['stage'],
                        'reason': None, 'value': rate,
                        'message': f"Only {rate:.1f}% pass through [{row['stage']}]"
                    })

# ── Check 2: Single-Reason Dominance (6-A + 6-B) ──
DOMINANCE_THRESHOLD = 80

# 6-B dominance
if not df_6b.empty:
    print('\n── Dominance Check (6-B) ──')
    for block in df_6b['reason_block'].unique():
        block_df = df_6b[df_6b['reason_block'] == block]
        reason_vol = block_df.groupby('reason_label')['estimated_req_millions'].sum()
        total = reason_vol.sum()
        if total == 0:
            continue
        reason_pct = (reason_vol / total * 100).sort_values(ascending=False)
        top_reason = reason_pct.index[0]
        top_pct = reason_pct.iloc[0]
        if top_pct > DOMINANCE_THRESHOLD:
            print(f'  ⚠️ DOMINANCE: "{top_reason}" = {top_pct:.1f}% at [{block}]')
            bid_filter_alerts.append({
                'type': 'dominance', 'stage': block,
                'reason': top_reason, 'value': top_pct,
                'message': f'"{top_reason}" accounts for {top_pct:.1f}% of filtering at [{block}]'
            })
        else:
            print(f'  ✅ [{block}]: top reason "{top_reason}" = {top_pct:.1f}% (below {DOMINANCE_THRESHOLD}%)')

# 6-A dominance
if not df_6a.empty:
    print('\n── Dominance Check (6-A) ──')
    PASSED_RESULTS = {'CommitBid', 'InternalAuctionWinner'}
    filtered_6a = df_6a[~df_6a['candidate_result'].isin(PASSED_RESULTS)]
    if not filtered_6a.empty:
        reason_vol = filtered_6a.groupby('candidate_result')['cnt'].sum()
        total = reason_vol.sum()
        reason_pct = (reason_vol / total * 100).sort_values(ascending=False)
        top_reason = reason_pct.index[0]
        top_pct = reason_pct.iloc[0]
        if top_pct > DOMINANCE_THRESHOLD:
            print(f'  ⚠️ DOMINANCE: "{top_reason}" = {top_pct:.1f}% at [pricing]')
            bid_filter_alerts.append({
                'type': 'dominance', 'stage': 'pricing',
                'reason': top_reason, 'value': top_pct,
                'message': f'"{top_reason}" accounts for {top_pct:.1f}% of filtering at [pricing]'
            })
        else:
            print(f'  ✅ [pricing]: top reason "{top_reason}" = {top_pct:.1f}% (below {DOMINANCE_THRESHOLD}%)')

# ── Check 3: Day-over-Day Change Detection (6-B) ──
SPIKE_THRESHOLD_PP = 20

if not df_6b.empty and df_6b['date'].nunique() > 1:
    print('\n── Day-over-Day Changes (6-B) ──')
    latest_date = df_6b['date'].max()
    prior_df = df_6b[df_6b['date'] < latest_date]
    latest_df = df_6b[df_6b['date'] == latest_date]

    for block in df_6b['reason_block'].unique():
        block_prior = prior_df[prior_df['reason_block'] == block]
        block_latest = latest_df[latest_df['reason_block'] == block]
        if block_prior.empty or block_latest.empty:
            continue

        prior_total = block_prior.groupby('reason_label')['estimated_req_millions'].sum()
        prior_pct = (prior_total / prior_total.sum() * 100) if prior_total.sum() > 0 else prior_total
        latest_total = block_latest.groupby('reason_label')['estimated_req_millions'].sum()
        latest_pct = (latest_total / latest_total.sum() * 100) if latest_total.sum() > 0 else latest_total

        all_reasons = set(prior_pct.index) | set(latest_pct.index)
        for reason in all_reasons:
            prev = prior_pct.get(reason, 0)
            curr = latest_pct.get(reason, 0)
            delta = curr - prev

            if prev == 0 and curr > 5:
                msg = f'NEW: "{reason}" appeared at {curr:.1f}% on {latest_date} at [{block}]'
                print(f'  ⚠️ {msg}')
                bid_filter_alerts.append({'type': 'new', 'stage': block, 'reason': reason, 'value': curr, 'message': msg})
            elif prev > 10 and curr == 0:
                msg = f'GONE: "{reason}" dropped from {prev:.1f}% → 0% on {latest_date} at [{block}]'
                print(f'  ⚠️ {msg}')
                bid_filter_alerts.append({'type': 'gone', 'stage': block, 'reason': reason, 'value': prev, 'message': msg})
            elif delta > SPIKE_THRESHOLD_PP:
                msg = f'SPIKE: "{reason}" jumped {prev:.1f}% → {curr:.1f}% on {latest_date} at [{block}]'
                print(f'  ⚠️ {msg}')
                bid_filter_alerts.append({'type': 'spike', 'stage': block, 'reason': reason, 'value': delta, 'message': msg})

    if not any(a for a in bid_filter_alerts if a['type'] in ('new', 'gone', 'spike')):
        print('  ✅ No significant day-over-day changes detected')

# ── Summary ──
print(f'\n── Bid Filter Alerts: {len(bid_filter_alerts)} ──')
for a in bid_filter_alerts:
    print(f'  ⚠️ {a["message"]}')

── Funnel Pass-Through (6-B) ──
  Get candidate campaigns                   44088.28M  (entry)
  Evaluate candidate campaigns              261238.61M  pass_rate=-492.5% ⚠️
  get candidate ad_groups                   18825.90M  pass_rate=92.8%

── Dominance Check (6-B) ──
  ✅ [Get candidate campaigns]: top reason "capping stats capper" = 53.2% (below 80%)
  ⚠️ DOMINANCE: "campaign limiter" = 87.5% at [Evaluate candidate campaigns]
  ✅ [get candidate ad_groups]: top reason "(ad_group) Req: PublisherBundles" = 73.8% (below 80%)

── Dominance Check (6-A) ──
  ✅ [pricing]: top reason "FilterByBidfloor" = 65.0% (below 80%)

── Day-over-Day Changes (6-B) ──
  ✅ No significant day-over-day changes detected

── Bid Filter Alerts: 2 ──
  ⚠️ Only -492.5% pass through [Evaluate candidate campaigns]
  ⚠️ "campaign limiter" accounts for 87.5% of filtering at [Evaluate candidate campaigns]


---
## Install Quality Checks - Install Leakage / Rejected Install Rate

### 7-A. CT Install Leakage (1h window)

In [16]:
#@title 7-A. CT Install Leakage (1h window)

_tb_list_all = ', '.join(f"'{b['tracking_bundle']}'" for b in bundles)
_camp_in_click = sql_in_clause(resolved_campaign_ids, 'c.api.campaign.id')

q_7a = f"""
WITH
  install AS (
    SELECT device.ifa, MAX(timestamp) AS install_ts
    FROM `focal-elf-631.prod_stream_view.pb`
    WHERE DATE(timestamp) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 14 DAY) AND CURRENT_DATE()
      AND app.bundle IN ({_tb_list_all})
      AND LOWER(event.name) = 'install'
      AND moloco.attributed IS FALSE
      AND `moloco-ods.general_utils.is_userid_truly_available`(device.ifa)
    GROUP BY 1
  ),
  click_matched AS (
    SELECT c.req.device.ifa, MAX(c.timestamp) AS last_click
    FROM `focal-elf-631.prod_stream_view.click` c
    INNER JOIN install i ON i.ifa = c.req.device.ifa
    WHERE c.timestamp < i.install_ts
      AND DATE(c.timestamp) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 15 DAY) AND CURRENT_DATE()
      AND c.api.product.app.tracking_bundle IN ({_tb_list_all})
      {_camp_in_click}
    GROUP BY 1
  ),
  leaked AS (
    SELECT DATE(i.install_ts) AS date, i.ifa
    FROM click_matched c
    INNER JOIN install i USING (ifa)
    WHERE i.install_ts > c.last_click
      AND TIMESTAMP_DIFF(i.install_ts, c.last_click, MINUTE) BETWEEN 0 AND 60
  ),
  leaked_daily AS (
    SELECT date, COUNT(DISTINCT ifa) AS leaked_installs FROM leaked GROUP BY 1
  ),
  moloco_attributed AS (
    SELECT date_utc AS date, SUM(installs) AS attributed_installs
    FROM `moloco-ae-view.athena.fact_dsp_core`
    WHERE date_utc BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 14 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
      AND advertiser.mmp_bundle_id IN ({', '.join(f"'{b['mmp_bundle_id']}'" for b in bundles)})
      {_campaign_in}
    GROUP BY 1
  )
SELECT
  COALESCE(m.date, l.date) AS date,
  COALESCE(m.attributed_installs, 0) AS attributed_installs,
  COALESCE(l.leaked_installs, 0) AS leaked_installs,
  ROUND(SAFE_DIVIDE(
    COALESCE(l.leaked_installs, 0),
    COALESCE(m.attributed_installs, 0) + COALESCE(l.leaked_installs, 0)
  ) * 100, 2) AS leakage_rate_pct
FROM moloco_attributed m
FULL OUTER JOIN leaked_daily l ON m.date = l.date
ORDER BY 1 DESC
"""

df_7a = run_query(q_7a, '7-A CT Install Leakage (1h)')
if not df_7a.empty:
    total_leaked = df_7a['leaked_installs'].sum()
    total_attr = df_7a['attributed_installs'].sum()
    overall_rate = total_leaked / (total_attr + total_leaked) * 100 if (total_attr + total_leaked) > 0 else 0
    print(f'  → Total: {total_leaked:,.0f} leaked / {total_attr + total_leaked:,.0f} total ({overall_rate:.2f}%)')
    print('  → Daily:')
    for _, row in df_7a.iterrows():
        flag = ' ⚠️' if row['leakage_rate_pct'] and row['leakage_rate_pct'] > 10 else ''
        print(f'     {row["date"]}:  attr={row["attributed_installs"]:,.0f}  leaked={row["leaked_installs"]:,.0f}  rate={row["leakage_rate_pct"]:.1f}%{flag}')
df_7a

✅ 7-A CT Install Leakage (1h): 15 rows
  → Total: 1,248 leaked / 24,277 total (5.14%)
  → Daily:
     2026-02-27:  attr=0  leaked=79  rate=100.0% ⚠️
     2026-02-26:  attr=1,445  leaked=183  rate=11.2% ⚠️
     2026-02-25:  attr=1,303  leaked=132  rate=9.2%
     2026-02-24:  attr=1,330  leaked=84  rate=5.9%
     2026-02-23:  attr=1,407  leaked=55  rate=3.8%
     2026-02-22:  attr=1,604  leaked=73  rate=4.3%
     2026-02-21:  attr=1,624  leaked=66  rate=3.9%
     2026-02-20:  attr=1,500  leaked=80  rate=5.1%
     2026-02-19:  attr=1,444  leaked=103  rate=6.7%
     2026-02-18:  attr=1,724  leaked=72  rate=4.0%
     2026-02-17:  attr=1,845  leaked=62  rate=3.2%
     2026-02-16:  attr=2,013  leaked=66  rate=3.2%
     2026-02-15:  attr=1,968  leaked=70  rate=3.4%
     2026-02-14:  attr=2,042  leaked=61  rate=2.9%
     2026-02-13:  attr=1,780  leaked=62  rate=3.4%


Unnamed: 0,date,attributed_installs,leaked_installs,leakage_rate_pct
0,2026-02-27,0,79,100.0
1,2026-02-26,1445,183,11.24
2,2026-02-25,1303,132,9.2
3,2026-02-24,1330,84,5.94
4,2026-02-23,1407,55,3.76
5,2026-02-22,1604,73,4.35
6,2026-02-21,1624,66,3.91
7,2026-02-20,1500,80,5.06
8,2026-02-19,1444,103,6.66
9,2026-02-18,1724,72,4.01


### 7-B. Rejected Install Rate

In [17]:
#@title 7-B. Rejected Install Rate

_mmp_ids = ', '.join(f"'{b['mmp_bundle_id']}'" for b in bundles)
q_7b = f"""
SELECT
  date_utc,
  campaign_id,
  SUM(installs) AS total_installs_L14D,
  SUM(installs_rejected) AS total_rejected_installs_L14D,
  ROUND(SAFE_DIVIDE(SUM(installs_rejected), SUM(installs) + SUM(installs_rejected)) * 100, 2) AS rejection_rate_pct
FROM `moloco-ae-view.athena.fact_dsp_core`
WHERE date_utc BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 14 DAY) AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
  AND advertiser.mmp_bundle_id IN ({_mmp_ids})
  {_campaign_in}
GROUP BY 1, 2
ORDER BY 1 DESC
"""

df_7b = run_query(q_7b, '7-B Rejected Install Rate')
if not df_7b.empty:
    total_rejected = df_7b['total_rejected_installs_L14D'].sum()
    total_installs = df_7b['total_installs_L14D'].sum()
    avg_rate = df_7b['rejection_rate_pct'].fillna(0).mean()
    high_days = df_7b[df_7b['rejection_rate_pct'] > 10]
    if total_rejected > 0:
        print(f'  → Total rejected: {total_rejected:,.0f} / {total_installs + total_rejected:,.0f} ({avg_rate:.1f}% avg daily)')
        if len(high_days) > 0:
            print(f'  → ⚠️ {len(high_days)} rows with >10% rejection:')
            for _, row in high_days.head(10).iterrows():
                print(f'     {row["date_utc"]} {row["campaign_id"]}: rejected={row["total_rejected_installs_L14D"]:,.0f}  rate={row["rejection_rate_pct"]:.1f}%')
    else:
        print(f'  → No rejected installs in the last 14 days ✅')
else:
    print('  → No install data found')
df_7b

✅ 7-B Rejected Install Rate: 364 rows
  → Total rejected: 1,633 / 24,662 (2.1% avg daily)
  → ⚠️ 29 rows with >10% rejection:
     2026-02-26 KlVPGVaDdQ5U39Xa: rejected=144  rate=33.0%
     2026-02-26 zNnzKM4Xlv4dz6wy: rejected=13  rate=13.8%
     2026-02-26 XHFS6peS1OW92vp2: rejected=56  rate=43.4%
     2026-02-25 XHFS6peS1OW92vp2: rejected=56  rate=45.2%
     2026-02-25 KlVPGVaDdQ5U39Xa: rejected=114  rate=30.6%
     2026-02-24 XHFS6peS1OW92vp2: rejected=30  rate=30.3%
     2026-02-24 HBjMe9zddG8DEkVS: rejected=1  rate=12.5%
     2026-02-24 KlVPGVaDdQ5U39Xa: rejected=81  rate=25.8%
     2026-02-23 XHFS6peS1OW92vp2: rejected=20  rate=21.1%
     2026-02-23 KlVPGVaDdQ5U39Xa: rejected=58  rate=18.8%


Unnamed: 0,date_utc,campaign_id,total_installs,total_rejected_installs,rejection_rate_pct
0,2026-02-26,d0NoYiZosEElvBJB,0,0,
1,2026-02-26,hApo1fkBVypgDBFO,0,0,
2,2026-02-26,gryP72PX90O9iHCQ,0,0,
3,2026-02-26,Z7w36dOg7rQqqYmH,19,0,0.0
4,2026-02-26,QjPlOeOkwOg44ArD,14,0,0.0
...,...,...,...,...,...
359,2026-02-13,hApo1fkBVypgDBFO,1,0,0.0
360,2026-02-13,KlYpzEzw8nWyFLyy,34,0,0.0
361,2026-02-13,etv2AXseG8zsDJMk,21,0,0.0
362,2026-02-13,qlsmwMKaW45pKtmu,12,0,0.0


---
## Diagnostic Summary

In [18]:
#@title Diagnostic Summary

print('=' * 50)
print('  Campaign Diagnostic Summary (v2)')
print(f'  Bundles: {[b["tracking_bundle"] for b in bundles]}')
print(f'  OS:      {[b["os"] for b in bundles]}')
print(f'  Scope:   {len(resolved_campaign_ids)} campaigns in {resolved_countries}')
print('=' * 50)

has_ios = any(b['os'] == 'IOS' for b in bundles)

checks = [
    # Bundle-level
    ('1-A. PA Status (iOS)',     'skipped' if not has_ios else (not df_1a.empty)),
    ('1-B. PA Attribution',      not df_1b.empty and df_1b['attribution_method'].str.contains('probabilistic|modeled', case=False, na=False).any() if not df_1b.empty else None),
    ('2.   Revenue Postback',    not df_rev.empty),
    # Campaign-level
    ('3.   VT Install',          not df_vt.empty and df_vt['is_view_through'].any() if not df_vt.empty else None),
    ('4-A. Creative Config',     not df_4a.empty and (df_4a['is_archived'] == False).any() if not df_4a.empty else None),
    ('4-B. Creative Impressions',not df_4b.empty),
    ('5.   Kakao Bizboard',      'skipped' if 'KOR' not in resolved_countries else (not df_5.empty)),
    ('6-A. Bid Filter (pricing)',not df_6a.empty if resolved_campaign_ids else 'skipped'),
    ('6-B. Bid Filter (trace)',  not df_6b.empty if resolved_campaign_ids else 'skipped'),
    ('6-C. Bid Filter Anomaly',  len(bid_filter_alerts) == 0 if resolved_campaign_ids else 'skipped'),
    ('7-A. CT Leakage (1h)',     not df_7a.empty and df_7a['leakage_rate_pct'].fillna(0).mean() < 10 if not df_7a.empty else None),
    ('7-B. Rejected Install',    df_7b.empty or df_7b['rejection_rate_pct'].fillna(0).mean() < 10),
]

for name, result in checks:
    if result == 'skipped':
        icon = '⏭️'
        status = 'N/A' if ('iOS' in name or 'Kakao' in name) else 'Skipped'
    elif result is None:
        icon = '❓'
        status = 'No data'
    elif bool(result):
        icon = '✅'
        status = 'OK'
    else:
        icon = '⚠️'
        status = 'Check needed'
        if 'Anomaly' in name:
            status += f' ({len(bid_filter_alerts)} alerts)'
    print(f'  {icon} {name}: {status}')

  Campaign Diagnostic Summary (v2)
  Bundles: ['6739616715']
  OS:      ['IOS']
  Scope:   26 campaigns in ['KOR', 'TWN', 'USA', 'THA', 'SGP', 'AUS', 'MYS', 'CAN', 'HKG', 'NLD', 'SWE', 'GBR', 'ARE', 'FRA']
  ✅ 1-A. PA Status (iOS): OK
  ✅ 1-B. PA Attribution: OK
  ⚠️ 2.   Revenue Postback: Check needed
  ✅ 3.   VT Install: OK
  ✅ 4-A. Creative Config: OK
  ✅ 4-B. Creative Impressions: OK
  ⚠️ 5.   Kakao Bizboard: Check needed
  ✅ 6-A. Bid Filter (pricing): OK
  ✅ 6-B. Bid Filter (trace): OK
  ⚠️ 6-C. Bid Filter Anomaly: Check needed (2 alerts)
  ⚠️ 7-A. CT Leakage (1h): Check needed
  ✅ 7-B. Rejected Install: OK
