In [None]:
import subprocess
import json
import pandas as pd
import time
import os

num_urls = 2
creators = [
    '@smiletojannah',
    '@thedeshbhakt',
    '@MuslimSkeptic'
]

output_path = 'URLs/url_channel_id.csv'
os.makedirs('URLs', exist_ok=True)

# Load existing data if the file exists
if os.path.exists(output_path):
    existing_df = pd.read_csv(output_path)
    existing_ids = set(existing_df['id'].astype(str))
else:
    existing_df = pd.DataFrame(columns=['creator_id', 'id', 'url', 'timestamp'])
    existing_ids = set()

# Prepare new data
new_data = {
    'creator_id': [],
    'id': [],
    'url': [],
    'timestamp': []
}

for creator in creators:
    channel_url = f"https://www.youtube.com/{creator}"  # or channel ID/URL

    # Run yt-dlp to fetch JSON metadata
    result = subprocess.run(
        ["yt-dlp", "--dump-json", "--flat-playlist", "--playlist-end", str(num_urls), channel_url],
        capture_output=True, text=True
    )

    # Parse each line
    video_entries = [json.loads(line) for line in result.stdout.strip().split('\n')]

    now = str(time.time())
    for entry in video_entries:
        video_id = entry['id']
        if video_id not in existing_ids:
            new_data['creator_id'].append(creator)
            new_data['id'].append(video_id)
            new_data['url'].append(f"https://www.youtube.com/watch?v={video_id}")
            new_data['timestamp'].append(now)
            existing_ids.add(video_id)  # Mark as seen

# Save if new data exists
if new_data['id']:
    new_df = pd.DataFrame(new_data)
    combined_df = pd.concat([existing_df, new_df], ignore_index=True)
    combined_df.to_csv(output_path, index=False)
    print(f"‚úÖ Added {len(new_df)} new videos.")
else:
    print("üîÅ No new videos to add.")