In [27]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import json
import os

BASE_URL = "https://www.brookeandjeffrey.com"

In [29]:
driver = webdriver.Chrome()  # or Firefox()
driver.get("https://www.brookeandjeffrey.com/featured/second-date-bjitm/")

temp = set()

# Keep clicking "Load More" until it's gone
while True:
    try:
        # Wait for Load More button
        load_more = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Load More')]"))
        )
        
        # Click it
        load_more.click()
        
        # Wait for content to load
        time.sleep(1)
        
    except:
        # No more Load More button
        break

# Now get all the links
soup = BeautifulSoup(driver.page_source, "html.parser")
for a in soup.select("a"):
    if a.get("href") and "second-date" in a["href"]:
        temp.add(BASE_URL + a["href"])

driver.quit()

episode_links = list()
for episode_link in temp:
    episode_links.append(episode_link)
    
print(episode_links)

['https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2022-09-12-second-date-mark-and-chloe-factory-fail/', 'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2022-02-14-second-date-kate-and-matt-haters-of-love/', 'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2022-02-02-second-date-emily-jason-too-hot-to-be-homeless/', 'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2022-10-24-second-date-henry-and-kylie-its-a-fall-world-after-all/', 'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2023-09-27-second-date-pet-store-probation-lilly-and-ethan/', 'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2022-01-24-second-date-vicky-adam-double-date-double-ghost/', 'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2022-10-03-second-date-julian-and-abby-wax-on-date-off/', 'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2021-09-01-second-date-jos

In [None]:
"""
Complete pipeline to download all Second Date Update episodes
"""
# Configuration
DOWNLOAD_FOLDER = "second_date_episodes"
SLEEP_AFTER_LOAD = 10
SLEEP_AFTER_CLICK = 18

def get_omny_embed_url_from_episode(episode_url):
    """Extract the Omny.fm embed URL from episode page"""
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    }
    
    response = requests.get(episode_url, headers=headers)
    html = response.text
    
    # Extract from __PRELOADED_STATE__
    start_marker = 'window.__PRELOADED_STATE__ = '
    start_idx = html.find(start_marker)
    
    if start_idx == -1:
        return None
    
    start_idx += len(start_marker)
    end_idx = html.find('</script>', start_idx)
    json_str = html[start_idx:end_idx].strip().rstrip(';')
    
    try:
        data = json.loads(json_str)
        
        # Search for omny.fm embed URL
        def find_omny_url(obj):
            if isinstance(obj, dict):
                for key, value in obj.items():
                    if isinstance(value, str) and 'omny.fm/shows/' in value and '/embed' in value:
                        if not value.startswith('<iframe'):
                            return value
                    result = find_omny_url(value)
                    if result:
                        return result
            elif isinstance(obj, list):
                for item in obj:
                    result = find_omny_url(item)
                    if result:
                        return result
            return None
        
        return find_omny_url(data)
    except:
        return None

def get_mp3_from_omny_embed(omny_embed_url):
    """Go to Omny embed page, click play, capture MP3 URL"""
    chrome_options = Options()
    chrome_options.set_capability('goog:loggingPrefs', {'performance': 'ALL'})
    chrome_options.add_argument('--headless')  # Run in background
    
    driver = webdriver.Chrome(options=chrome_options)
    
    try:
        driver.get(omny_embed_url)
        time.sleep(SLEEP_AFTER_LOAD)
        
        # Try to click play button
        try:
            selectors = [
                'button[title="Play"]',
                'button[aria-label="Play"]',
                'button.play',
                '.play-button',
                'button[class*="play"]'
            ]
            
            for selector in selectors:
                try:
                    play_button = driver.find_element(By.CSS_SELECTOR, selector)
                    play_button.click()
                    break
                except:
                    continue
        except:
            pass
        
        time.sleep(SLEEP_AFTER_CLICK)
        
        # Check network logs
        logs = driver.get_log('performance')
        
        for log in logs:
            try:
                message = json.loads(log['message'])
                method = message.get('message', {}).get('method', '')
                
                if method == 'Network.responseReceived':
                    response = message['message']['params']['response']
                    url = response.get('url', '')
                    
                    if 'tritondigital.com' in url and '.mp3' in url:
                        driver.quit()
                        return url
            except:
                continue
        
        driver.quit()
        return None
        
    except Exception as e:
        driver.quit()
        return None

def download_mp3(mp3_url, filename):
    """Download MP3 file"""
    response = requests.get(mp3_url, stream=True)
    
    with open(filename, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)

def sanitize_filename(url):
    """Extract a clean filename from the episode URL"""
    # Get the last part of the URL
    match = re.search(r'/content/(.+?)/?$', url)
    if match:
        name = match.group(1)
        # Clean up the name
        name = name.replace('second-date-bjitm-content-', '')
        name = name.replace('second-date-update-', '')
        name = name.replace('second-date-', '')
        return name + '.mp3'
    return 'unknown.mp3'

def main(episode_links):
    """Main function to download all episodes"""
    
    # Create download folder
    if not os.path.exists(DOWNLOAD_FOLDER):
        os.makedirs(DOWNLOAD_FOLDER)
        print(f"Created folder: {DOWNLOAD_FOLDER}/")
    
    total = len(episode_links)
    successful = 0
    failed = 0
    
    print(f"\n{'='*70}")
    print(f"Starting download of {total} episodes")
    print(f"{'='*70}\n")
    
    for i, episode_url in enumerate(episode_links, 1):
        print(f"[{i}/{total}] Processing: {episode_url}")
        
        # Step 1: Get Omny embed URL
        omny_url = get_omny_embed_url_from_episode(episode_url)
        if not omny_url:
            print(f"  ✗ Failed to get Omny URL")
            failed += 1
            continue
        
        print(f"  ✓ Found Omny URL")
        
        # Step 2: Get MP3 URL from Omny
        mp3_url = get_mp3_from_omny_embed(omny_url)
        if not mp3_url:
            print(f"  ✗ Failed to get MP3 URL")
            failed += 1
            continue
        
        print(f"  ✓ Found MP3 URL")
        
        # Step 3: Download
        filename = sanitize_filename(episode_url)
        filepath = os.path.join(DOWNLOAD_FOLDER, filename)
        
        # Skip if already downloaded
        if os.path.exists(filepath):
            print(f"  ⊙ Already exists, skipping")
            successful += 1
            continue
        
        try:
            download_mp3(mp3_url, filepath)
            print(f"  ✓ Downloaded: {filename}")
            successful += 1
        except Exception as e:
            print(f"  ✗ Download failed: {e}")
            failed += 1
        
        print()  # Blank line for readability
    
    print(f"\n{'='*70}")
    print(f"COMPLETE!")
    print(f"{'='*70}")
    print(f"Successful: {successful}/{total}")
    print(f"Failed: {failed}/{total}")
    print(f"Files saved to: {DOWNLOAD_FOLDER}/")

if __name__ == "__main__":
    
    if len(episode_links) == 0:
        print("ERROR: No episode links provided!")
        print("Please add your episode_links list to the script")
    else:
        main(episode_links)

Created folder: second_date_episodes/

Starting download of 650 episodes

[1/650] Processing: https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2022-09-12-second-date-mark-and-chloe-factory-fail/
  ✓ Found Omny URL
  ✓ Found MP3 URL
  ✓ Downloaded: 2022-09-12-mark-and-chloe-factory-fail.mp3

[2/650] Processing: https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2022-02-14-second-date-kate-and-matt-haters-of-love/
  ✓ Found Omny URL
  ✓ Found MP3 URL
  ✓ Downloaded: 2022-02-14-kate-and-matt-haters-of-love.mp3

[3/650] Processing: https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2022-02-02-second-date-emily-jason-too-hot-to-be-homeless/
  ✓ Found Omny URL
  ✓ Found MP3 URL
  ✓ Downloaded: 2022-02-02-emily-jason-too-hot-to-be-homeless.mp3

[4/650] Processing: https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2022-10-24-second-date-henry-and-kylie-its-a-fall-world-after-all/
  ✓ Found Omny URL
  ✓ Found MP3 URL
  ✓ Dow

In [33]:
failed_episode_links = [
    'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2025-03-27-second-date-update-update-mike-catherine-carjitsu-crazy/',
    'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2025-10-10-second-date-update-blaine-delilah-hot-date-hack/',
    'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2023-03-24-second-date-aiden-and-kim-how-dare-you-heart-me/',
    'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2021-02-25-second-date-check-in-shark-boy/',
    'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2024-12-02-second-date-update-sarah-and-steve-blurry-in-a-hurry/',
    'https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2021-06-16-second-date-sydney-and-kurt-the-long-game/'
]

main(failed_episode_links)


Starting download of 6 episodes

[1/6] Processing: https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2025-03-27-second-date-update-update-mike-catherine-carjitsu-crazy/
  ✓ Found Omny URL
  ✓ Found MP3 URL
  ✓ Downloaded: 2025-03-27-update-mike-catherine-carjitsu-crazy.mp3

[2/6] Processing: https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2025-10-10-second-date-update-blaine-delilah-hot-date-hack/
  ✓ Found Omny URL
  ✓ Found MP3 URL
  ✓ Downloaded: 2025-10-10-blaine-delilah-hot-date-hack.mp3

[3/6] Processing: https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2023-03-24-second-date-aiden-and-kim-how-dare-you-heart-me/
  ✓ Found Omny URL
  ✓ Found MP3 URL
  ✓ Downloaded: 2023-03-24-aiden-and-kim-how-dare-you-heart-me.mp3

[4/6] Processing: https://www.brookeandjeffrey.com/featured/second-date-bjitm/content/2021-02-25-second-date-check-in-shark-boy/
  ✓ Found Omny URL
  ✓ Found MP3 URL
  ✓ Downloaded: 2021-02-25-check-in-shark-boy