In [12]:
#@title Environment Setup

from google.cloud import bigquery
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)

client = bigquery.Client(project='moloco-ods')

def process_query(input_query):
    job_config = bigquery.QueryJobConfig()
    query_job = client.query(input_query, job_config=job_config)
    df_return = query_job.result().to_dataframe()
    return df_return

In [13]:
#@title [Alternative] Load from existing Excel file and map to Platform/Advertiser/App/GM
# Use this cell if you already have the .xlsx file and just want to add mapping info

import os

# === LOAD EXCEL FILE ===
excel_file = 'ODSB-16052_D28_eligibility_results.xlsx'

if os.path.exists(excel_file):
    # Load all sheets from Excel
    df_d28_eligibility = pd.read_excel(excel_file, sheet_name='All_Bundle_Country')
    df_bundle_summary = pd.read_excel(excel_file, sheet_name='Bundle_Summary')
    df_eligible = pd.read_excel(excel_file, sheet_name='Eligible_Bundle_Country')
    df_not_eligible = pd.read_excel(excel_file, sheet_name='Not_Eligible_Bundle_Country')
    df_kor_bundle_country = pd.read_excel(excel_file, sheet_name='KOR_Bundle_Country_7day')
    
    print(f"Loaded data from: {excel_file}")
    print(f"  - All_Bundle_Country: {len(df_d28_eligibility)} rows")
    print(f"  - Bundle_Summary: {len(df_bundle_summary)} rows")
    print(f"  - Eligible_Bundle_Country: {len(df_eligible)} rows")
    print(f"  - Not_Eligible_Bundle_Country: {len(df_not_eligible)} rows")
    print(f"  - KOR_Bundle_Country_7day: {len(df_kor_bundle_country)} rows")
    
    # === MAP MMP BUNDLE ID TO PLATFORM, ADVERTISER, APP BUNDLE NAME, GM ===
    unique_bundles = df_d28_eligibility['bundle'].unique().tolist()
    bundle_list_str = "'" + "','".join([str(b).replace("'", "\\'") for b in unique_bundles]) + "'"
    
    query_mapping = f"""
    SELECT DISTINCT
        advertiser.mmp_bundle_id AS mmp_bundle_id,
        platform_id AS platform,
        advertiser.title AS advertiser,
        product.app_market_bundle AS app_bundle_name,
        advertiser.gm AS gm
    FROM `moloco-ae-view.athena.fact_dsp_core`
    WHERE date_utc >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)
        AND advertiser.mmp_bundle_id IN ({bundle_list_str})
        AND advertiser.mmp_bundle_id IS NOT NULL
        AND advertiser.mmp_bundle_id != ''
    """
    
    df_mapping = process_query(query_mapping)
    print(f"\nMapping query completed: {len(df_mapping)} unique mappings")
    
    # === MERGE MAPPING WITH DATA ===
    # Enrich bundle x country data
    df_enriched = df_d28_eligibility.merge(
        df_mapping,
        left_on='bundle',
        right_on='mmp_bundle_id',
        how='left'
    )
    column_order = ['bundle', 'platform', 'advertiser', 'app_bundle_name', 'gm', 'country', 
                    'spend_7d_usd', 'd7_payers', 'd28_payers', 'd8_to_d28_extra_payers', 
                    'extra_payer_pct', 'd28_eligibility']
    df_enriched = df_enriched[[c for c in column_order if c in df_enriched.columns]]
    
    # Enrich bundle summary
    df_bundle_summary_enriched = df_bundle_summary.merge(
        df_mapping,
        left_on='bundle',
        right_on='mmp_bundle_id',
        how='left'
    )
    summary_column_order = ['bundle', 'platform', 'advertiser', 'app_bundle_name', 'gm',
                           'spend_7d_usd_total', 'num_qualified_countries', 
                           'd7_payers', 'd28_payers', 'd8_to_d28_extra_payers', 
                           'extra_payer_pct', 'd28_eligibility']
    df_bundle_summary_enriched = df_bundle_summary_enriched[[c for c in summary_column_order if c in df_bundle_summary_enriched.columns]]
    
    print(f"\nEnriched data ready:")
    print(f"  - df_enriched: {df_enriched.shape}")
    print(f"  - df_bundle_summary_enriched: {df_bundle_summary_enriched.shape}")
    print(f"\nYou can now skip to the 'Export to Google Spreadsheet' cell (Cell 9)")
    
    df_bundle_summary_enriched.head(10)
else:
    print(f"Excel file not found: {excel_file}")
    print("Please run cells 1-4 first to generate the data, or check the file path.")

Loaded data from: ODSB-16052_D28_eligibility_results.xlsx
  - All_Bundle_Country: 96 rows
  - Bundle_Summary: 61 rows
  - Eligible_Bundle_Country: 34 rows
  - Not_Eligible_Bundle_Country: 62 rows
  - KOR_Bundle_Country_7day: 124 rows

Mapping query completed: 96 unique mappings

Enriched data ready:
  - df_enriched: (152, 12)
  - df_bundle_summary_enriched: (96, 12)

You can now skip to the 'Export to Google Spreadsheet' cell (Cell 9)


In [2]:
#@title Query 1: KOR Office Bundle x Country with Spend >= $3,500 (Last 7 Days)

query1 = """
SELECT
    advertiser.mmp_bundle_id AS bundle_id,
    campaign.country AS country,
    advertiser.title AS advertiser_name,
    advertiser_id,
    advertiser.office AS office,
    SUM(gross_spend_usd) AS total_spend_usd,
    SUM(impressions) AS total_impressions,
    SUM(installs) AS total_installs,
    SAFE_DIVIDE(SUM(gross_spend_usd), SUM(installs)) AS cpi_usd
FROM `moloco-ae-view.athena.fact_dsp_core`
WHERE date_utc >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY)
    AND date_utc < CURRENT_DATE()
    AND advertiser.office = 'KOR'
GROUP BY 1, 2, 3, 4, 5
HAVING SUM(gross_spend_usd) >= 3500
ORDER BY total_spend_usd DESC
"""

df_kor_bundle_country = process_query(query1)
print(f"Total KOR bundle x country combinations with spend >= $3,500: {len(df_kor_bundle_country)}")
print(f"Unique bundles: {df_kor_bundle_country['bundle_id'].nunique()}")
print(f"Unique countries: {df_kor_bundle_country['country'].nunique()}")
df_kor_bundle_country.head(20)

Total KOR bundles with spend >= $3,500: 100


Unnamed: 0,bundle_id,advertiser_name,advertiser_id,office,total_spend_usd,total_impressions,total_installs,cpi_usd
0,6739616715,nexon,h2ZXEkvik631BFdB,KOR,264012.172423427,409224896,17877,14.768259351
1,cvapi#com.coupang.mobile,쿠팡파트너스,Ppb7QnsCyXujMEhw,KOR,185184.639580224,31119630,0,
2,com.nexon.ma,nexon,h2ZXEkvik631BFdB,KOR,157006.932410771,116983776,10334,15.193239057
3,com.cpone.customer,(ECHO) 로켓나우,jp41i8qyihbZe4X5,KOR,90577.581423821,118569681,3506,25.835020372
4,com.percent.aos.luckydefense,Lucky Defense,t52aeGmi7ov3wppl,KOR,74296.354609826,80922645,2904,25.584144149
5,id894546091,Webtoon US,uDn6qhoy3zYOvTkc,KOR,54193.33205708,10192037,11766,4.605926573
6,com.towneers.www,당근마켓,Voql38wJkmDNzXbW,KOR,47919.25793453,30905623,5099,9.397775629
7,id6482291732,Lucky Defense,t52aeGmi7ov3wppl,KOR,42453.461023429,68410430,3784,11.219202173
8,com.netmarble.tskgb,netmarble,yfg0At8VksGnt6EO,KOR,37704.443224185,28995134,458,82.324111843
9,id6503702666,Coop TD,siscri0zaqilR4wg,KOR,33675.181896878,92310247,3666,9.185810665


In [None]:
#@title Query 2: D28 Eligibility Calculation (Bundle x Country Level)
# Updated Formula: (D28_payers - D7_payers) / D28_payers > 0.20
# Only for bundle x country combinations with 7-day spend >= $3,500

query2 = """
-- D28 Test Eligibility Analysis for KOR Office Bundles (Bundle x Country)
-- Updated Formula: (D28_payers - D7_payers) / D28_payers > 0.20
-- Filter: Only bundle x country combinations with 7-day spend >= $3,500

WITH kor_bundle_country AS (
    -- Get bundle x country combinations with 7-day spend >= $3,500
    SELECT
        advertiser.mmp_bundle_id as bundle,
        campaign.country as country,
        SUM(gross_spend_usd) as spend_7d
    FROM `moloco-ae-view.athena.fact_dsp_core`
    WHERE date_utc >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY)
        AND date_utc < CURRENT_DATE()
        AND advertiser.office = 'KOR'
        AND advertiser.mmp_bundle_id IS NOT NULL
        AND advertiser.mmp_bundle_id != ''
    GROUP BY 1, 2
    HAVING SUM(gross_spend_usd) >= 3500
),

-- Get installs from df_accesslog.pb (only for qualified bundle x country)
installs AS (
    SELECT
        CASE
            WHEN `moloco-ml.lat_utils.is_userid_truly_available`(device.idfa) THEN device.idfa
            WHEN `moloco-ml.lat_utils.is_userid_truly_available`(mmp.device_id) THEN mmp.device_id
            WHEN `moloco-ml.lat_utils.is_userid_truly_available`(device.idfv) THEN device.idfv
        END AS user_id,
        app.bundle as bundle,
        device.country as country,
        IF(event.event_at <> '1970-01-01 00:00:00 UTC', event.event_at, timestamp) as install_at
    FROM `focal-elf-631.df_accesslog.pb`
    WHERE DATE(timestamp) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)
        AND DATE(timestamp) < DATE_SUB(CURRENT_DATE(), INTERVAL 28 DAY)
        AND event.name = 'install'
        -- Filter by qualified bundle x country pairs
        AND (app.bundle, device.country) IN (SELECT (bundle, country) FROM kor_bundle_country)
),

-- Get purchase events from df_accesslog.pb
purchases AS (
    SELECT
        CASE
            WHEN `moloco-ml.lat_utils.is_userid_truly_available`(device.idfa) THEN device.idfa
            WHEN `moloco-ml.lat_utils.is_userid_truly_available`(mmp.device_id) THEN mmp.device_id
            WHEN `moloco-ml.lat_utils.is_userid_truly_available`(device.idfv) THEN device.idfv
        END AS user_id,
        app.bundle as bundle,
        device.country as country,
        IF(event.event_at <> '1970-01-01 00:00:00 UTC', event.event_at, timestamp) as event_at
    FROM `focal-elf-631.df_accesslog.pb`
    WHERE DATE(timestamp) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)
        -- Filter by qualified bundle x country pairs
        AND (app.bundle, device.country) IN (SELECT (bundle, country) FROM kor_bundle_country)
        AND (
            event.name IN ('af_purchase', 'purchase', 'iap_purchase', 'first_purchase', 'revenue')
            OR LOWER(event.name) LIKE '%purchase%'
        )
),

-- Join installs with purchases to calculate days since install
payer_data AS (
    SELECT
        i.bundle,
        i.country,
        i.user_id,
        TIMESTAMP_DIFF(p.event_at, i.install_at, DAY) as days_since_install
    FROM installs i
    INNER JOIN purchases p 
        ON i.user_id = p.user_id 
        AND i.bundle = p.bundle
        AND i.country = p.country
    WHERE i.user_id IS NOT NULL
        AND i.install_at IS NOT NULL
        AND p.event_at IS NOT NULL
        AND TIMESTAMP_DIFF(p.event_at, i.install_at, DAY) BETWEEN 0 AND 28
),

-- Get first purchase day per user
first_purchase AS (
    SELECT
        bundle,
        country,
        user_id,
        MIN(days_since_install) as first_purchase_day
    FROM payer_data
    GROUP BY 1, 2, 3
),

-- Aggregate at bundle x country level
bundle_country_metrics AS (
    SELECT
        fp.bundle,
        fp.country,
        kbc.spend_7d,
        COUNT(DISTINCT CASE WHEN fp.first_purchase_day <= 7 THEN fp.user_id END) as d7_payers,
        COUNT(DISTINCT CASE WHEN fp.first_purchase_day <= 28 THEN fp.user_id END) as d28_payers
    FROM first_purchase fp
    JOIN kor_bundle_country kbc 
        ON fp.bundle = kbc.bundle 
        AND fp.country = kbc.country
    GROUP BY 1, 2, 3
)

SELECT
    bundle,
    country,
    ROUND(spend_7d, 2) as spend_7d_usd,
    d7_payers,
    d28_payers,
    d28_payers - d7_payers as d8_to_d28_extra_payers,
    ROUND(SAFE_DIVIDE(d28_payers - d7_payers, d28_payers) * 100, 2) as extra_payer_pct,
    CASE
        WHEN d28_payers = 0 THEN 'NO_D28_PAYERS'
        WHEN SAFE_DIVIDE(d28_payers - d7_payers, d28_payers) > 0.20 THEN 'ELIGIBLE'
        ELSE 'NOT_ELIGIBLE'
    END as d28_eligibility
FROM bundle_country_metrics
WHERE d28_payers > 0
ORDER BY extra_payer_pct DESC
"""

df_d28_eligibility = process_query(query2)
print(f"Total bundle x country combinations with payer data: {len(df_d28_eligibility)}")
print(f"ELIGIBLE: {len(df_d28_eligibility[df_d28_eligibility['d28_eligibility'] == 'ELIGIBLE'])}")
print(f"NOT ELIGIBLE: {len(df_d28_eligibility[df_d28_eligibility['d28_eligibility'] == 'NOT_ELIGIBLE'])}")
df_d28_eligibility.head(20)

In [None]:
#@title Filter results by eligibility status

df_eligible = df_d28_eligibility[df_d28_eligibility['d28_eligibility'] == 'ELIGIBLE'].copy()
df_not_eligible = df_d28_eligibility[df_d28_eligibility['d28_eligibility'] == 'NOT_ELIGIBLE'].copy()

print(f"ELIGIBLE bundle x country combinations: {len(df_eligible)}")
print(f"NOT ELIGIBLE bundle x country combinations: {len(df_not_eligible)}")

# Summary by bundle (aggregating across countries)
print(f"\nUnique ELIGIBLE bundles: {df_eligible['bundle'].nunique()}")
print(f"Unique NOT ELIGIBLE bundles: {df_not_eligible['bundle'].nunique()}")

In [None]:
#@title Export to Excel (xlsx)

output_file = 'ODSB-16052_D28_eligibility_results.xlsx'

# Create bundle-level summary (aggregating across countries)
df_bundle_summary = df_d28_eligibility.groupby('bundle').agg({
    'spend_7d_usd': 'sum',  # sum of 7-day spend across qualified countries
    'd7_payers': 'sum',
    'd28_payers': 'sum',
    'd8_to_d28_extra_payers': 'sum',
    'country': 'count'  # number of qualified countries
}).reset_index()
df_bundle_summary.columns = ['bundle', 'spend_7d_usd_total', 'd7_payers', 'd28_payers', 'd8_to_d28_extra_payers', 'num_qualified_countries']
df_bundle_summary['extra_payer_pct'] = round((df_bundle_summary['d28_payers'] - df_bundle_summary['d7_payers']) / df_bundle_summary['d28_payers'] * 100, 2)
df_bundle_summary['d28_eligibility'] = df_bundle_summary['extra_payer_pct'].apply(lambda x: 'ELIGIBLE' if x > 20 else 'NOT_ELIGIBLE')
df_bundle_summary = df_bundle_summary.sort_values('extra_payer_pct', ascending=False)

with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
    # Sheet 1: All bundle x country combinations
    df_d28_eligibility.to_excel(writer, sheet_name='All_Bundle_Country', index=False)
    
    # Sheet 2: Eligible bundle x country combinations
    df_eligible.to_excel(writer, sheet_name='Eligible_Bundle_Country', index=False)
    
    # Sheet 3: Not eligible bundle x country combinations
    df_not_eligible.to_excel(writer, sheet_name='Not_Eligible_Bundle_Country', index=False)
    
    # Sheet 4: Bundle-level summary (aggregated across countries)
    df_bundle_summary.to_excel(writer, sheet_name='Bundle_Summary', index=False)
    
    # Sheet 5: KOR bundle x country with spend (7-day)
    df_kor_bundle_country.to_excel(writer, sheet_name='KOR_Bundle_Country_7day', index=False)

print(f"Results exported to: {output_file}")
print(f"\nSheets created:")
print(f"  1. All_Bundle_Country - All bundle x country combinations with D28 eligibility")
print(f"  2. Eligible_Bundle_Country - Bundle x country with >20% extra payers")
print(f"  3. Not_Eligible_Bundle_Country - Bundle x country with <=20% extra payers")
print(f"  4. Bundle_Summary - Aggregated bundle-level summary")
print(f"  5. KOR_Bundle_Country_7day - KOR bundle x country with spend >= $3,500 (7-day)")

In [None]:
#@title Map MMP Bundle ID to Platform, Advertiser, App Bundle Name, GM

# Get unique bundle_ids from the eligibility results
unique_bundles = df_d28_eligibility['bundle'].unique().tolist()

# Create a comma-separated string for the IN clause
bundle_list_str = "'" + "','".join([b.replace("'", "\\'") for b in unique_bundles]) + "'"

query_mapping = f"""
SELECT DISTINCT
    advertiser.mmp_bundle_id AS mmp_bundle_id,
    platform_id AS platform,
    advertiser.title AS advertiser,
    product.app_market_bundle AS app_bundle_name,
    advertiser.gm AS gm
FROM `moloco-ae-view.athena.fact_dsp_core`
WHERE date_utc >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)
    AND advertiser.mmp_bundle_id IN ({bundle_list_str})
    AND advertiser.mmp_bundle_id IS NOT NULL
    AND advertiser.mmp_bundle_id != ''
"""

df_mapping = process_query(query_mapping)
print(f"Total unique mmp_bundle_id mappings: {len(df_mapping)}")
df_mapping.head(20)

In [None]:
#@title Merge mapping with eligibility results

# Merge df_d28_eligibility with df_mapping
df_enriched = df_d28_eligibility.merge(
    df_mapping,
    left_on='bundle',
    right_on='mmp_bundle_id',
    how='left'
)

# Reorder columns to put mapping info first
column_order = ['bundle', 'platform', 'advertiser', 'app_bundle_name', 'gm', 'country', 
                'spend_7d_usd', 'd7_payers', 'd28_payers', 'd8_to_d28_extra_payers', 
                'extra_payer_pct', 'd28_eligibility']
df_enriched = df_enriched[[c for c in column_order if c in df_enriched.columns]]

print(f"Enriched dataframe shape: {df_enriched.shape}")
print(f"Bundles without mapping: {df_enriched['platform'].isna().sum()}")
df_enriched.head(20)

In [None]:
#@title Create enriched bundle summary (with mapping)

# Merge bundle summary with mapping
df_bundle_summary_enriched = df_bundle_summary.merge(
    df_mapping,
    left_on='bundle',
    right_on='mmp_bundle_id',
    how='left'
)

# Reorder columns
summary_column_order = ['bundle', 'platform', 'advertiser', 'app_bundle_name', 'gm',
                       'spend_7d_usd_total', 'num_qualified_countries', 
                       'd7_payers', 'd28_payers', 'd8_to_d28_extra_payers', 
                       'extra_payer_pct', 'd28_eligibility']
df_bundle_summary_enriched = df_bundle_summary_enriched[[c for c in summary_column_order if c in df_bundle_summary_enriched.columns]]

print(f"Enriched bundle summary shape: {df_bundle_summary_enriched.shape}")
print(f"\nEligible bundles:")
print(df_bundle_summary_enriched[df_bundle_summary_enriched['d28_eligibility'] == 'ELIGIBLE'][['bundle', 'advertiser', 'gm', 'extra_payer_pct']].head(20))

In [31]:
#@title Export to Google Spreadsheet (Service Account)
# Make sure to install: pip install gspread gspread-dataframe google-auth

import gspread
from gspread_dataframe import set_with_dataframe
from google.oauth2 import service_account

# === CONFIGURATION ===
# Option 1: Specify path to your service account JSON file
SERVICE_ACCOUNT_FILE = None  # e.g., '/path/to/your-service-account.json'

# Option 2: If None, uses Application Default Credentials (gcloud auth)
# Make sure you've run: gcloud auth application-default login

# Your email to share the spreadsheet with (so you can see it in your Drive)
SHARE_WITH_EMAIL = 'haewon.yum@moloco.com'  # <-- Update this with your email

# Project to use (moloco-ods has the APIs enabled)
GCP_PROJECT = 'moloco-ods'

# === AUTHENTICATION ===
SCOPES = [
    'https://www.googleapis.com/auth/spreadsheets',
    'https://www.googleapis.com/auth/drive'
]

if SERVICE_ACCOUNT_FILE:
    # Use service account JSON file
    creds = service_account.Credentials.from_service_account_file(
        SERVICE_ACCOUNT_FILE, scopes=SCOPES
    )
else:
    # Use Application Default Credentials with specific project
    import google.auth
    from google.auth import credentials
    creds, _ = google.auth.default(scopes=SCOPES, quota_project_id=GCP_PROJECT)

gc = gspread.authorize(creds)

# === CREATE/OPEN SPREADSHEET ===
spreadsheet_name = 'ODSB-16052_D28_Eligibility_KOR'

try:
    sh = gc.open(spreadsheet_name)
    print(f"Opened existing spreadsheet: {spreadsheet_name}")
except gspread.SpreadsheetNotFound:
    sh = gc.create(spreadsheet_name)
    print(f"Created new spreadsheet: {spreadsheet_name}")
    # Share with your email so it appears in your Google Drive
    if SHARE_WITH_EMAIL:
        sh.share(SHARE_WITH_EMAIL, perm_type='user', role='writer')
        print(f"Shared with: {SHARE_WITH_EMAIL}")

# === HELPER FUNCTION ===
def update_or_create_worksheet(spreadsheet, sheet_name, df):
    try:
        worksheet = spreadsheet.worksheet(sheet_name)
        worksheet.clear()
    except gspread.WorksheetNotFound:
        worksheet = spreadsheet.add_worksheet(title=sheet_name, rows=len(df)+1, cols=len(df.columns))
    set_with_dataframe(worksheet, df)
    print(f"  - Updated sheet: {sheet_name}")

# === EXPORT DATA ===
print("Exporting to Google Spreadsheet...")
update_or_create_worksheet(sh, 'Bundle_Summary_Enriched', df_bundle_summary_enriched)
update_or_create_worksheet(sh, 'Bundle_Country_Enriched', df_enriched)
update_or_create_worksheet(sh, 'Eligible_Only', df_enriched[df_enriched['d28_eligibility'] == 'ELIGIBLE'])
update_or_create_worksheet(sh, 'Mapping_Reference', df_mapping)

print(f"\nSpreadsheet URL: {sh.url}")
print("\nSheets created:")
print("  1. Bundle_Summary_Enriched - Aggregated bundle-level with mapping")
print("  2. Bundle_Country_Enriched - All bundle x country with mapping")
print("  3. Eligible_Only - Only eligible combinations")
print("  4. Mapping_Reference - Raw mapping table")

Created new spreadsheet: ODSB-16052_D28_Eligibility_KOR
Shared with: haewon.yum@moloco.com
Exporting to Google Spreadsheet...
  - Updated sheet: Bundle_Summary_Enriched
  - Updated sheet: Bundle_Country_Enriched
  - Updated sheet: Eligible_Only
  - Updated sheet: Mapping_Reference

Spreadsheet URL: https://docs.google.com/spreadsheets/d/1YPYL0i59AmW5JgJVhugtS7nXTqfAIiSODt8kO3CSM50

Sheets created:
  1. Bundle_Summary_Enriched - Aggregated bundle-level with mapping
  2. Bundle_Country_Enriched - All bundle x country with mapping
  3. Eligible_Only - Only eligible combinations
  4. Mapping_Reference - Raw mapping table


In [20]:
#@title Transform to GDS Uploader Format
# Converts eligible bundles to GDS Uploader CSV format
# Initiative Name: D28 Product Testing
# Tracking Entity: from campaign.tracking_entity in fact_dsp_core
# DRR: spend_7d_usd / 7

# === Step 1: Get eligible bundles from the spreadsheet ===
# Use the enriched eligible data (from Cell 1 or generated data)
df_eligible_bundles = df_enriched[df_enriched['d28_eligibility'] == 'ELIGIBLE'].copy()
print(f"Eligible bundle x country combinations: {len(df_eligible_bundles)}")

# === Step 2: Query tracking_entity from fact_dsp_core ===
eligible_bundle_list = df_eligible_bundles['bundle'].unique().tolist()
bundle_list_str = "'" + "','".join([str(b).replace("'", "\\'") for b in eligible_bundle_list]) + "'"

query_tracking_entity = f"""
SELECT DISTINCT
    advertiser.mmp_bundle_id AS bundle,
    campaign.tracking_entity AS tracking_entity
FROM `moloco-ae-view.athena.fact_dsp_core`
WHERE date_utc >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)
    AND advertiser.mmp_bundle_id IN ({bundle_list_str})
    AND campaign.tracking_entity IS NOT NULL
    AND campaign.tracking_entity != ''
"""

df_tracking = process_query(query_tracking_entity)
print(f"Tracking entity mappings found: {len(df_tracking)}")

# === Step 3: Merge and calculate DRR ===
df_gds_upload = df_eligible_bundles.merge(
    df_tracking,
    on='bundle',
    how='left'
)

# Calculate DRR (Daily Run Rate) = spend_7d_usd / 7
# Convert to numeric first (in case it's stored as string/object)
df_gds_upload['spend_7d_usd'] = pd.to_numeric(df_gds_upload['spend_7d_usd'], errors='coerce')
df_gds_upload['DRR'] = (df_gds_upload['spend_7d_usd'] / 7).round(2)

# === Step 4: Format for GDS Uploader (4 columns only) ===
df_gds_format = pd.DataFrame({
    'Initiative Name': 'D28 Product Testing',
    'Tracking Entity': df_gds_upload['tracking_entity'],
    'Bundle ID': df_gds_upload['app_bundle_name'],
    'DRR': df_gds_upload['DRR']
})

# Drop rows without tracking entity
df_gds_format = df_gds_format.dropna(subset=['Tracking Entity'])

# Remove duplicate rows
df_gds_format = df_gds_format.drop_duplicates()

print(f"\nGDS Upload format ready: {len(df_gds_format)} distinct rows")
df_gds_format.head(20)

Eligible bundle x country combinations: 58
Tracking entity mappings found: 29

GDS Upload format ready: 41 distinct rows


Unnamed: 0,Initiative Name,Tracking Entity,Bundle ID,DRR
0,D28 Product Testing,INCROSS - NCSOFT,com.ncsoft.aion2,779.5
1,D28 Product Testing,MADUP - 매드업_삼성증권,1150231646,794.86
2,D28 Product Testing,KRAFTON - PUBG Mobile,1366526331,566.92
4,D28 Product Testing,MADUP - 매드업_삼성증권,com.samsungpop.android.mpop,2181.35
5,D28 Product Testing,SUNDAYTOZ,com.sundaytoz.mobile.anipang2.google.kakao.ser...,1435.92
7,D28 Product Testing,SPACEV,1491007143,516.57
8,D28 Product Testing,KRAFTON - PUBG Mobile,com.pubg.krmobile,1610.71
10,D28 Product Testing,MEZZOMEDIA - (광고3팀_3파트)넷마블_아스달연대기,com.netmarble.nanagb,1044.91
11,D28 Product Testing,MEZZOMEDIA - (광고3팀_3파트)넷마블_아스달연대기,com.netmarble.nanatsunotaizai,2194.6
12,D28 Product Testing,SUNDAYTOZ,com.sundaytoz.kakao.anipang4,1309.65


In [21]:
#@title Export GDS Upload Format to Google Spreadsheet

import gspread
from gspread_dataframe import set_with_dataframe
import google.auth

# === CONFIGURATION ===
SHARE_WITH_EMAIL = 'haewon.yum@moloco.com'
GCP_PROJECT = 'moloco-ods'

# === AUTHENTICATION ===
SCOPES = [
    'https://www.googleapis.com/auth/spreadsheets',
    'https://www.googleapis.com/auth/drive'
]
creds, _ = google.auth.default(scopes=SCOPES, quota_project_id=GCP_PROJECT)
gc = gspread.authorize(creds)

# === HELPER FUNCTION ===
def update_or_create_worksheet(spreadsheet, sheet_name, df):
    try:
        worksheet = spreadsheet.worksheet(sheet_name)
        worksheet.clear()
    except gspread.WorksheetNotFound:
        worksheet = spreadsheet.add_worksheet(title=sheet_name, rows=len(df)+1, cols=len(df.columns))
    set_with_dataframe(worksheet, df)
    print(f"  - Updated sheet: {sheet_name}")

# === CREATE NEW SPREADSHEET FOR GDS UPLOAD ===
gds_spreadsheet_name = 'D28_Product_Testing_GDS_Upload'

try:
    sh_gds = gc.open(gds_spreadsheet_name)
    print(f"Opened existing spreadsheet: {gds_spreadsheet_name}")
except gspread.SpreadsheetNotFound:
    sh_gds = gc.create(gds_spreadsheet_name)
    print(f"Created new spreadsheet: {gds_spreadsheet_name}")
    if SHARE_WITH_EMAIL:
        sh_gds.share(SHARE_WITH_EMAIL, perm_type='user', role='writer')
        print(f"Shared with: {SHARE_WITH_EMAIL}")

# === EXPORT GDS FORMAT ===
print("Exporting to Google Spreadsheet...")

# GDS upload sheet (4 columns: Initiative Name, Tracking Entity, Bundle ID, DRR)
update_or_create_worksheet(sh_gds, 'GDS_Upload', df_gds_format)

print(f"\nSpreadsheet URL: {sh_gds.url}")
print(f"\nSheet created: GDS_Upload ({len(df_gds_format)} rows)")
print(f"Columns: {list(df_gds_format.columns)}")

Opened existing spreadsheet: D28_Product_Testing_GDS_Upload
Exporting to Google Spreadsheet...
  - Updated sheet: GDS_Upload

Spreadsheet URL: https://docs.google.com/spreadsheets/d/1HGW5uiONeLCF379kzXiXluxa6qk4TIONhxoUJ0QrA9U

Sheet created: GDS_Upload (41 rows)
Columns: ['Initiative Name', 'Tracking Entity', 'Bundle ID', 'DRR']
