In [1]:
from datetime import datetime
from pathlib import Path
import re
import sqlite3


def extract_show_id(url):
    match = re.search(r"/id(\d+)", url)
    return match.group(1) if match else None


def scrape_shows(db_path: Path, retry=False):
    status = "error" if retry else "pending"
    scraped_at = datetime.now().isoformat()

    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    cursor.execute(
        """
        SELECT podcast.id, podcast.name, podcast.category, podcast.url, download.status
        FROM podcast
        JOIN download ON podcast.id = download.id
        LEFT JOIN frequency ON podcast.id = frequency.podcast_id
        WHERE download.status = ?
        OR (download.status = 'active' AND (frequency.next_scrape IS NULL OR frequency.next_scrape <= ?))
        """,
        (status, scraped_at),
    )

    results = cursor.fetchall()

    total_podcasts = len(results)
    print(total_podcasts)

    conn.close()


if __name__ == "__main__":
    scrape_shows(db_path=Path("../data/podcasts.db"), retry=False)

18211
