In [11]:
#@title Environment Setup
# ODSB-16084: Rocket Now - Geo Performance Analysis (JPN)
# Request: Campaign performance by geo (prefecture level)
# Format: Month, OS, Campaign Goal, Region, Spend, Actions D7, CPA D7
#
# Tables used:
# - imp table: Spend with geo data (use api.product.app.store_id for bundle)
# - cv table: Conversion/action data (use api.product.app.tracking_bundle for bundle)

from google.cloud import bigquery
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

client = bigquery.Client(project='moloco-ods')

def process_query(input_query):
    job_config = bigquery.QueryJobConfig()
    query_job = client.query(input_query, job_config=job_config)
    df_return = query_job.result().to_dataframe()
    return df_return

In [12]:
#@title Configuration
# Rocket Now bundle ID
BUNDLE_ID = 'com.cpone.customer'

# Target country (JPN based on fact_dsp_core data)
COUNTRY = 'JPN'

# Analysis period
START_DATE = '2025-09-21'
END_DATE = '2026-01-29'

In [14]:
#@title Step 1: Check sample geo data from imp table

sample_query = f"""
SELECT
    req.device.geo.country AS country,
    req.device.geo.region AS region,
    req.device.geo.city AS city,
    COUNT(*) AS impressions,
    SUM(SAFE_DIVIDE(imp.cost.analysis.demand_charge_cost.usd.amount_micro, 1e6)) AS spend_usd
FROM `focal-elf-631.prod_stream_view.imp`
WHERE api.product.app.store_id = '{BUNDLE_ID}'
    # AND DATE(timestamp) >= DATE('{START_DATE}')
    AND DATE(timestamp) = DATE('{END_DATE}')
    AND req.device.geo.country = '{COUNTRY}'
GROUP BY 1, 2, 3
ORDER BY spend_usd DESC
LIMIT 50
"""

df_sample = process_query(sample_query)
print(f"Sample geo data from imp table:")
print(f"Unique regions: {df_sample['region'].nunique()}")
print(f"Unique cities: {df_sample['city'].nunique()}")
print(f"\nTop region/city combinations by spend:")
df_sample.head(30)

Sample geo data from imp table:
Unique regions: 20
Unique cities: 31

Top region/city combinations by spend:


Unnamed: 0,country,region,city,impressions,spend_usd
0,JPN,,,318775,365.62739
1,JPN,13,tokyo,167520,158.983902
2,JPN,27,osaka,122721,126.378018
3,JPN,14,yokohama,67722,61.333176
4,JPN,,tokyo,32989,37.738989
5,JPN,,osaka,35720,34.112867
6,JPN,23,nagoya,35627,33.928708
7,JPN,jp-27,osaka,20915,26.886882
8,JPN,26,kyoto,24553,26.305108
9,JPN,jp-13,,23276,25.473847


In [15]:
#@title Step 2: Check null/empty rate for region and city

null_check_query = f"""
SELECT
    COUNT(*) AS total_impressions,
    SUM(SAFE_DIVIDE(imp.cost.analysis.demand_charge_cost.usd.amount_micro, 1e6)) AS total_spend,
    COUNTIF(req.device.geo.region IS NULL OR req.device.geo.region = '') AS null_region_cnt,
    COUNTIF(req.device.geo.city IS NULL OR req.device.geo.city = '') AS null_city_cnt,
    SUM(IF(req.device.geo.region IS NULL OR req.device.geo.region = '', 
           SAFE_DIVIDE(imp.cost.analysis.demand_charge_cost.usd.amount_micro, 1e6), 0)) AS null_region_spend,
    SUM(IF(req.device.geo.city IS NULL OR req.device.geo.city = '', 
           SAFE_DIVIDE(imp.cost.analysis.demand_charge_cost.usd.amount_micro, 1e6), 0)) AS null_city_spend,
    ROUND(COUNTIF(req.device.geo.region IS NULL OR req.device.geo.region = '') / COUNT(*) * 100, 2) AS null_region_pct,
    ROUND(COUNTIF(req.device.geo.city IS NULL OR req.device.geo.city = '') / COUNT(*) * 100, 2) AS null_city_pct
FROM `focal-elf-631.prod_stream_view.imp`
WHERE api.product.app.store_id = '{BUNDLE_ID}'
    AND DATE(timestamp) >= DATE('{START_DATE}')
    AND DATE(timestamp) <= DATE('{END_DATE}')
    AND req.device.geo.country = '{COUNTRY}'
"""

df_null_check = process_query(null_check_query)
print("=== Data Quality Check: Null/Empty Rate ===")
print(f"Analysis Period: {START_DATE} ~ {END_DATE}")
df_null_check

=== Data Quality Check: Null/Empty Rate ===
Analysis Period: 2025-09-21 ~ 2026-01-29


Unnamed: 0,total_impressions,total_spend,null_region_cnt,null_city_cnt,null_region_spend,null_city_spend,null_region_pct,null_city_pct
0,354710954,564885.913682,82805996,65759469,179441.225316,125419.100758,23.34,18.54


In [None]:
#@title Step 3: Get Accurate Spend by Region from imp table
# Using imp.cost.analysis.demand_charge_cost.usd.amount_micro for accurate spend

spend_query = f"""
SELECT
    FORMAT_DATE('%Y-%m', DATE(timestamp)) AS month,
    req.device.os AS os,
    api.campaign.id AS campaign_id,
    api.campaign.title AS campaign_title,
    -- Use raw region value (Japan prefectures)
    COALESCE(NULLIF(req.device.geo.region, ''), 'Unknown') AS region,
    COUNT(*) AS impressions,
    ROUND(SUM(SAFE_DIVIDE(imp.cost.analysis.demand_charge_cost.usd.amount_micro, 1e6)), 2) AS spend_usd
FROM `focal-elf-631.prod_stream_view.imp`
WHERE api.product.app.store_id = '{BUNDLE_ID}'
    AND DATE(timestamp) >= DATE('{START_DATE}')
    AND DATE(timestamp) <= DATE('{END_DATE}')
    AND req.device.geo.country = '{COUNTRY}'
GROUP BY 1, 2, 3, 4, 5
ORDER BY month DESC, spend_usd DESC
"""

df_spend = process_query(spend_query)
print(f"=== Spend Data from imp table (Accurate) ===")
print(f"Analysis Period: {START_DATE} ~ {END_DATE}")
print(f"Total Spend: ${df_spend['spend_usd'].sum():,.2f}")
print(f"Total Impressions: {df_spend['impressions'].sum():,}")
df_spend.head(30)

In [None]:
#@title Step 4: Get D7 Actions by Region from cv table

actions_query = f"""
WITH installs AS (
    SELECT
        COALESCE(req.device.ifa, cv.pb.device.ifa, cv.pb.device.ifv) AS user_id,
        FORMAT_DATE('%Y-%m', DATE(install.happened_at)) AS install_month,
        req.device.os AS os,
        api.campaign.id AS campaign_id,
        -- Use raw region value (Japan prefectures)
        COALESCE(NULLIF(req.device.geo.region, ''), 'Unknown') AS region,
        install.happened_at AS install_at
    FROM `focal-elf-631.prod_stream_view.cv`
    WHERE api.product.app.tracking_bundle = '{BUNDLE_ID}'
        AND DATE(timestamp) >= DATE('{START_DATE}')
        AND DATE(timestamp) <= DATE('{END_DATE}')
        AND req.device.geo.country = '{COUNTRY}'
        AND install.happened_at IS NOT NULL
),

actions AS (
    SELECT
        COALESCE(req.device.ifa, cv.pb.device.ifa, cv.pb.device.ifv) AS user_id,
        cv.happened_at AS action_at
    FROM `focal-elf-631.prod_stream_view.cv`
    WHERE api.product.app.tracking_bundle = '{BUNDLE_ID}'
        AND DATE(timestamp) >= DATE('{START_DATE}')
        AND DATE(timestamp) <= DATE_ADD(DATE('{END_DATE}'), INTERVAL 7 DAY)
        AND req.device.geo.country = '{COUNTRY}'
        AND cv.event IS NOT NULL
        AND cv.event != 'INSTALL'
)

SELECT
    i.install_month AS month,
    i.os,
    i.campaign_id,
    i.region,
    COUNT(DISTINCT i.user_id) AS installs,
    COUNT(DISTINCT CASE 
        WHEN a.action_at IS NOT NULL AND TIMESTAMP_DIFF(a.action_at, i.install_at, DAY) BETWEEN 0 AND 7 
        THEN i.user_id 
    END) AS actions_d7
FROM installs i
LEFT JOIN actions a ON i.user_id = a.user_id
WHERE i.user_id IS NOT NULL
GROUP BY 1, 2, 3, 4
ORDER BY month DESC, installs DESC
"""

df_actions = process_query(actions_query)
print(f"=== Actions D7 from cv table ===")
print(f"Analysis Period: {START_DATE} ~ {END_DATE}")
df_actions.head(30)

In [None]:
#@title Step 5: Join Spend and Actions

# Merge spend and actions
df_final = df_spend.merge(
    df_actions[['month', 'os', 'campaign_id', 'region', 'installs', 'actions_d7']], 
    on=['month', 'os', 'campaign_id', 'region'], 
    how='left'
)

# Fill NaN with 0
df_final['installs'] = df_final['installs'].fillna(0).astype(int)
df_final['actions_d7'] = df_final['actions_d7'].fillna(0).astype(int)

# Calculate CPA D7
df_final['cpa_d7'] = round(
    df_final['spend_usd'] / df_final['actions_d7'].replace(0, float('nan')), 2
)

# Reorder columns
df_final = df_final[['month', 'os', 'campaign_id', 'campaign_title', 'region', 
                      'spend_usd', 'impressions', 'installs', 'actions_d7', 'cpa_d7']]

print("=== Final Result: Geo Performance ===")
print(f"Analysis Period: {START_DATE} ~ {END_DATE}")
print(f"Total Spend: ${df_final['spend_usd'].sum():,.2f}")
print(f"Total Actions D7: {df_final['actions_d7'].sum():,}")
df_final.head(30)

In [None]:
#@title Step 6: Summary by Region (Prefecture)

df_region_summary = df_final.groupby('region').agg({
    'spend_usd': 'sum',
    'impressions': 'sum',
    'installs': 'sum',
    'actions_d7': 'sum'
}).reset_index()

df_region_summary['cpa_d7'] = round(df_region_summary['spend_usd'] / df_region_summary['actions_d7'].replace(0, float('nan')), 2)
df_region_summary['spend_pct'] = round(df_region_summary['spend_usd'] / df_region_summary['spend_usd'].sum() * 100, 2)
df_region_summary = df_region_summary.sort_values('spend_usd', ascending=False)

print("=== Region Summary (시/도 Level) ===")
df_region_summary

In [None]:
#@title Step 7: City-level Analysis

city_query = f"""
SELECT
    FORMAT_DATE('%Y-%m', DATE(timestamp)) AS month,
    req.device.os AS os,
    COALESCE(NULLIF(req.device.geo.region, ''), 'Unknown') AS region,
    req.device.geo.city AS city,
    COUNT(*) AS impressions,
    ROUND(SUM(SAFE_DIVIDE(imp.cost.analysis.demand_charge_cost.usd.amount_micro, 1e6)), 2) AS spend_usd
FROM `focal-elf-631.prod_stream_view.imp`
WHERE api.product.app.store_id = '{BUNDLE_ID}'
    AND DATE(timestamp) >= DATE('{START_DATE}')
    AND DATE(timestamp) <= DATE('{END_DATE}')
    AND req.device.geo.country = '{COUNTRY}'
GROUP BY 1, 2, 3, 4
ORDER BY spend_usd DESC
LIMIT 100
"""

df_city = process_query(city_query)
print("=== City-level Data ===")
print(f"Note: City accuracy ~80%, Region accuracy ~98%")
df_city.head(50)

In [None]:
#@title Step 8: Export to Excel (xlsx)

output_file = 'ODSB-16084_RocketNow_Geo_Performance.xlsx'

with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
    # Sheet 1: Detailed data (Month, OS, Campaign, Region)
    df_final.to_excel(writer, sheet_name='Detailed_Data', index=False)
    
    # Sheet 2: Region Summary (시/도)
    df_region_summary.to_excel(writer, sheet_name='Region_Summary', index=False)
    
    # Sheet 3: City-level data
    df_city.to_excel(writer, sheet_name='City_Data', index=False)
    
    # Sheet 4: Data Quality Check
    df_null_check.to_excel(writer, sheet_name='Data_Quality', index=False)

print(f"Results exported to: {output_file}")
print(f"Analysis Period: {START_DATE} ~ {END_DATE}")
print(f"\nSheets created:")
print(f"  1. Detailed_Data - Month, OS, Campaign, Region, Spend, Actions D7, CPA D7")
print(f"  2. Region_Summary - Aggregated by region (Prefecture)")
print(f"  3. City_Data - City-level breakdown")
print(f"  4. Data_Quality - Null/empty rate for region and city")