# Ingest Beeswax & Ipon Reports (API)

## Imports (& Load ENV variables)

In [9]:
import requests
import time
import tempfile
from os import environ as ENV
from dotenv import load_dotenv
from pyspark.sql import SparkSession
from io import StringIO

load_dotenv()

True

## Initialise Spark Session

In [2]:
spark = SparkSession.builder \
    .appName("Reporting_Test") \
    .getOrCreate()

Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/09/29 11:35:14 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


## Beeswax API Authenticate

In [3]:
auth_url = ENV['BEESWAX_BASE_URL']+'/authenticate'
auth_payload = {'email': ENV['BEESWAX_EMAIL'], 'password': ENV['BEESWAX_PASSWORD']}
auth_headers = {'Content-Type': 'application/json'}

# authenticate request

response = requests.post(auth_url, json=auth_payload, headers=auth_headers)
response.raise_for_status()

print(response.json())

# store auth cookie in session

session = requests.Session()

auth_response = session.post(auth_url, json=auth_payload, headers=auth_headers)
auth_response.raise_for_status()


{'success': True, 'message': 'Cookie set successfully'}


## Beeswax API Call (Report Dump)

In [18]:
def bw_api_get_report_fields(report_type):
    """retrieve all fields for given report"""
    report_url = ENV['BEESWAX_BASE_URL']+'/v2/reporting/reports/'+report_type

    report_response = session.get(report_url)
    report_response.raise_for_status()
    data = report_response.json()

    return [val['name'] for val in data['fields']]


    

## Beeswax API Call (Get Report)

In [13]:
def bw_api_get_report(report_type, field_list, filter_dict):
    """retrieve beeswax report"""
    report_url = ENV['BEESWAX_BASE_URL']+'/v2/reporting/run-query'
    report_payload={
        'view': report_type,
        'fields': field_list,
        'filters': filter_dict,
        'result_format': 'csv'
    }
    print('generate report')
    report_response = session.post(report_url, json=report_payload)
    report_response.raise_for_status()
    data = report_response.json()

    task_id = data['task_id']
    async_url = ENV['BEESWAX_BASE_URL']+'/v2/reporting/async-results/'+task_id

    start_time = time.time()
    max_wait_seconds = 60

    while True:
        result_response = session.get(async_url)
        if result_response.status_code == 200:
            print('report received')
            print('spark initialise')
            # write to temp file
            with tempfile.NamedTemporaryFile(mode="w+", suffix=".csv", delete=False) as temp:
                temp.write(result_response.text)
                temp_path = temp.name
            report_df = spark.read.option('header', True).option('inferSchema', True).csv(temp_path)
            #rdd = spark.sparkContext.parallelize(result_response.text.splitlines())
            #report_df = spark.read.option('header', True).option('inferSchema', True).csv(rdd)
            break
        elif result_response.status_code == 202:
            elapsed_time = time.time() - start_time
            if elapsed_time > max_wait_seconds:
                raise TimeoutError (f'Report not ready after {max_wait_seconds} seconds')
            time.sleep(5)
        else:
            result_response.raise_for_status()

    return report_df




## Get Performance and Bid Performance Report Data

In [None]:

performance_fields = bw_api_get_report_fields('performance_agg')
performance_report_df = bw_api_get_report('performance_agg', performance_fields, {'bid_day': '2025-09-26'})

bid_performance_fields = bw_api_get_report_fields('bid_performance_agg')
bid_performance_report_df = bw_api_get_report('bid_performance_agg', bid_performance_fields, {'bid_day': '2025-09-26'})

"""
performance_report_df = bw_api_get_report(
    'performance_agg',
    [
        "bid_day", 
        "advertiser_id", 
        "advertiser_name", 
        "campaign_id", 
        "campaign_name", 
        "line_item_id", 
        "line_item_name", 
        "creative_id", 
        "creative_name", 
        "creative_size",
        "impression", 
        "clicks", 
        "media_spend", 
        "postback_conversions", 
        "viewthrough_conversions", 
        "clickthrough_conversions", 
        "video_plays", 
        "video_q1s", 
        "video_midpoints", 
        "video_q3s", 
        "video_completes", 
        "cpm", 
        "cpc", 
        "spend_per_conversion", 
        "ctr"
    ],
    {
        'bid_day': '2025-09-26'
    }
)

bid_performance_report_df = bw_api_get_report(
    'bid_performance_agg',
    [
        "bid_day", 
        "advertiser_id", 
        "campaign_id", 
        "line_item_id",  
        "creative_id", 
        "bid",
        "win_rate"
    ],
    {
        'bid_day': '2025-09-26'
    }
)
"""


generate report
report received
spark initialise


'\nperformance_report_df = bw_api_get_report(\n    \'performance_agg\',\n    [\n        "bid_day", \n        "advertiser_id", \n        "advertiser_name", \n        "campaign_id", \n        "campaign_name", \n        "line_item_id", \n        "line_item_name", \n        "creative_id", \n        "creative_name", \n        "creative_size",\n        "impression", \n        "clicks", \n        "media_spend", \n        "postback_conversions", \n        "viewthrough_conversions", \n        "clickthrough_conversions", \n        "video_plays", \n        "video_q1s", \n        "video_midpoints", \n        "video_q3s", \n        "video_completes", \n        "cpm", \n        "cpc", \n        "spend_per_conversion", \n        "ctr"\n    ],\n    {\n        \'bid_day\': \'2025-09-26\'\n    }\n)\n\nbid_performance_report_df = bw_api_get_report(\n    \'bid_performance_agg\',\n    [\n        "bid_day", \n        "advertiser_id", \n        "campaign_id", \n        "line_item_id",  \n        "creative_id

## Beeswax Join Performance & Bid Performance Reports

In [None]:
# NEXT STEPS: WRITE EACH REPORT TO SEPARATE TABLES & JOIN LATER IN SNOWFLAKE
# OR: CHANGE 'SHARED_COLS' COLUMN NAMES TO MATCH DF, JOIN, THEN WRITE TO SINGULAR TABLE

shared_cols = ["bid_day", "advertiser_id", "campaign_id", "line_item_id", "creative_id"]

performance_report_df.show()
bid_performance_report_df.show()

#merged_df = performance_report_df.join(bid_performance_report_df, on = shared_cols, how = 'left')

#merged_df.show()

## Beeswax Write to Staging Table

## Ipon API Call

## Ipon Write to Staging Table