In [0]:
import requests
import time
import json
import logging
from datetime import datetime, timedelta, timezone

In [0]:
dbutils.widgets.dropdown("environment", "prd", ["prd"], "1-Environment")
env = dbutils.widgets.get("environment")

dbutils.widgets.text("target_date", "", "2-Target Date")
TARGET_DATE = dbutils.widgets.get("target_date")

if not TARGET_DATE:
    TARGET_DATE = datetime.now(timezone.utc).strftime("%Y%m%d")

BASE_URL = 'https://app.ticketmaster.com/discovery/v2/events.json'
API_KEY = dbutils.secrets.get(scope="jeanarch", key="tkapikey")
REQUEST_SLEEP = 0.2
PAGE_SIZE = 200
MAX_EVENTS_THRESHOLD = 1000
COUNTRY = "US"

In [0]:
def get_content(start: datetime, end: datetime, country:str, page: int = 0):
    params = {
        'page': page,
        'startDateTime': start.strftime('%Y-%m-%dT%H:%M:%SZ'),
        'endDateTime': end.strftime('%Y-%m-%dT%H:%M:%SZ'),
        'size': PAGE_SIZE,
        'countryCode': country
    }
    logging.info(f"Request Params: {params}")

    params['apikey'] = API_KEY

    response = requests.get(BASE_URL, params=params)
    time.sleep(REQUEST_SLEEP)
    data = response.json()
    return data

In [0]:
def deep_generate_file(data:str, start:datetime, end:datetime, country:str):
    page = int(data["page"]["number"])
    total = int(data["page"]["totalPages"])
    generate_file(data, start, end, country) 
    for i in range(page+1, total):
        data = get_content(start, end, country, i)
        generate_file(data, start, end, country, i)

def generate_file(data:str, start:datetime, end:datetime, country:str, page:int = 0):
    current_time_str = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S")
    start_str = start.strftime("%Y%m%d%H%M%S")
    end_str = end.strftime("%Y%m%d%H%M%S")
    file_name = f"/Volumes/{env}_bronze/landing/ticketmaster/events/{country}/ticketmaster-events-{country}-{current_time_str}-{start_str}_{end_str}-{page}.json"
    content = json.dumps(data)
    with open(file_name, "w") as file:
        file.write(content)
    print(file_name)
    return file_name


def generate_time_window(input_day: datetime, hour: int, minute: int, window_minutes: int = 60):
    start = input_day.replace(hour=hour, minute=minute, second=0)
    end = start + timedelta(minutes=window_minutes) - timedelta(seconds=1)
    return start, end

def process_events(input_day: datetime, country:str = COUNTRY):
    for hour in range(24):
        start, end = generate_time_window(input_day, hour, 0)
        try:
            data = get_content(start, end, country)
            total_elements = int(data["page"]["totalElements"])

            if total_elements > MAX_EVENTS_THRESHOLD:
                logging.info(f"Request interval: each 10 minutes")
                for minute in range(0, 60, 10):
                    start, end = generate_time_window(input_day, hour, minute, 10)
                    data = get_content(start, end, country)  
                    deep_generate_file(data, start, end, country) 
            else:
                logging.info(f"Request interval: hourly")
                deep_generate_file(data, start, end, country) 

        except requests.RequestException as e:
            logging.error(f"Request error: {e}")


input_day = datetime.strptime(TARGET_DATE, "%Y%m%d")
process_events(input_day)