In [4]:
from yt_dlp import YoutubeDL

# --- STEP 1: CONFIGURATION ---
# The core topics you want to forecast demand for
base_keywords = ["Pulsar N160", "Yamaha R15"]

# Expansion terms to catch different "Demand Signals"
# "Review" = Interest | "Problems" = Negative Sentiment | "Vs" = Competitor Analysis
modifiers = ["review", "problems", "mileage test", "ownership experience"]

# How many videos to grab per search query?
VIDEOS_PER_SEARCH = 5

def get_video_urls(base_keywords, modifiers):

    # 1. Generate Expanded Queries
    search_queries = []
    for topic in base_keywords:
        for mod in modifiers:
            query = f"{topic} {mod}"
            search_queries.append(query)

    print(f"Generated {len(search_queries)} search queries: {search_queries}")

    # 2. Search YouTube using yt-dlp (No API Key needed)
    found_urls = set() # Use a set to avoid duplicate videos

    ydl_opts = {
        'quiet': True,
        'extract_flat': True,  # FAST mode: Only grabs metadata, doesn't download video
        'ignoreerrors': True,
    }

    with YoutubeDL(ydl_opts) as ydl:
        for query in search_queries:
            print(f"Searching for: '{query}'...")
            try:
                # "ytsearchN:" tells yt-dlp to search and return N results
                search_term = f"ytsearch{VIDEOS_PER_SEARCH}:{query}"
                result = ydl.extract_info(search_term, download=False)

                if 'entries' in result:
                    for video in result['entries']:
                        url = f"https://www.youtube.com/watch?v={video['id']}"
                        found_urls.add(url)
            except Exception as e:
                print(f"Error searching {query}: {e}")

    # 3. Output Results
    url_list = list(found_urls)
    print(f"\nSUCCESS! Found {len(url_list)} unique video URLs.")
    return url_list

# --- RUN THE SEARCH ---
# This generates the list of URLs for your Big Data pipeline
video_queue = get_video_urls(base_keywords, modifiers)

print("\n--- Copy these URLs to your Downloader ---")
print(video_queue)

Generated 8 search queries: ['Pulsar N160 review', 'Pulsar N160 problems', 'Pulsar N160 mileage test', 'Pulsar N160 ownership experience', 'Yamaha R15 review', 'Yamaha R15 problems', 'Yamaha R15 mileage test', 'Yamaha R15 ownership experience']
Searching for: 'Pulsar N160 review'...
Searching for: 'Pulsar N160 problems'...
Searching for: 'Pulsar N160 mileage test'...
Searching for: 'Pulsar N160 ownership experience'...
Searching for: 'Yamaha R15 review'...
Searching for: 'Yamaha R15 problems'...
Searching for: 'Yamaha R15 mileage test'...
Searching for: 'Yamaha R15 ownership experience'...

SUCCESS! Found 40 unique video URLs.

--- Copy these URLs to your Downloader ---
['https://www.youtube.com/watch?v=MQ9oMgx75wk', 'https://www.youtube.com/watch?v=Ym-w6OUyvGk', 'https://www.youtube.com/watch?v=Sz0m_KH6zL8', 'https://www.youtube.com/watch?v=BFdD5JD-064', 'https://www.youtube.com/watch?v=UY6dnOAl1P4', 'https://www.youtube.com/watch?v=HfzzMfSYO48', 'https://www.youtube.com/watch?v=_0mnS