# Selenium Web Scraping

In [2]:
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def get_snopes_fact_check_urls(search_term,page_num):
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")

    driver = webdriver.Chrome(options=chrome_options)

    url = f"https://www.snopes.com/search/?q={search_term}#gsc.tab=0&gsc.q={search_term}&gsc.page={page_num}"

    try:
        driver.get(url)
        WebDriverWait(driver, 7).until(
            EC.presence_of_element_located((By.CLASS_NAME, "gs-title"))
        )

        links = driver.find_elements(By.CLASS_NAME, "gs-title")

        fact_check_urls = []
        for link in links:
            href = link.get_attribute("href")
            if href and "https://www.snopes.com/fact-check" in href:
                fact_check_urls.append(href)

        return fact_check_urls

    except Exception as e:
        print(f"An error occurred: {e}")
        return []
    finally:
        driver.quit()

if __name__ == '__main__':
    all_urls = []
    for page_num in range(1, 10):
        urls = get_snopes_fact_check_urls("video", page_num)
        if urls:
            switch_count=True
            print(f"Found the following fact-check URLs on page {page_num}:")
            for url in urls:
                if(switch_count):
                  print(url)
                  all_urls.append(url)
                  switch_count=False
                else:
                  switch_count=True
        else:
            print(f"No fact-check URLs found on page {page_num}.")

    print("\nAll collected URLs:")
    print(all_urls)

    filepath = os.path.join(".", "input.txt")
    try:
        with open(filepath, "w") as f:
            for url in all_urls:
                f.write(url + "\n")
        print(f"URLs saved to {filepath}")
    except Exception as e:
      print("Could not save to file")

Found the following fact-check URLs on page 1:
https://www.snopes.com/fact-check/trump-musk-nyc-protest/
https://www.snopes.com/fact-check/video-bugs-pringles-chip/
https://www.snopes.com/fact-check/dog-train-tracks-tiktok-video/
https://www.snopes.com/fact-check/video-crowd-celebrating-trump-mugshot/
https://www.snopes.com/fact-check/voter-fraud-in-2016-primary/
https://www.snopes.com/fact-check/europa-io-jupiter-video/
https://www.snopes.com/fact-check/1908-olympics-video/
https://www.snopes.com/fact-check/us-cargo-ship-houthi-missile/
https://www.snopes.com/fact-check/trump-project-2025-roberts/
https://www.snopes.com/fact-check/polo-blow/
https://www.snopes.com/fact-check/video-zelenskyy-dancing-high-heels/
https://www.snopes.com/fact-check/elephant-seal-chilean-town/
https://www.snopes.com/fact-check/video-snowy-owl-carrying-chicks/
https://www.snopes.com/fact-check/officer-nakia-jones-fired-after-viral-police-brutality-video/
Found the following fact-check URLs on page 2:
https:/

# BS4 Scraping + JSON Builder

In [None]:
from bs4 import BeautifulSoup
import requests
import yt_dlp
import os
import subprocess
import json
import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time


def get_headline(url):
    try:
        response = requests.get(url)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        title_container = soup.find('section', class_='title-container')
        if title_container:
            headline_tag = title_container.find('h1')
            return headline_tag.text.strip() if headline_tag else None
        return None

    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

def get_subheadline(url):
    try:
        response = requests.get(url)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        title_container = soup.find('section', class_='title-container')
        if title_container:
            subheadline_tag = title_container.find('h2')
            return subheadline_tag.text.strip() if subheadline_tag else None
        return None

    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

def _get_social_media_platform(link):
    if not link:
        return None

    social_media_domains = {
        "tiktok.com": "tiktok",
        "youtube.com": "youtube",
        "twitter.com": "twitter",
        "x.com": "twitter",
        "facebook.com": "facebook",
        "reddit.com": "reddit",
        "instagram.com": "instagram",
    }

    for domain, platform_name in social_media_domains.items():
        if domain in link:
            return platform_name
    return None

def get_rating(url):
    try:
        response = requests.get(url)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        rating_div = soup.find('div', class_='rating_title_wrap')
        if rating_div:
            for child in rating_div.children:
                if child.string and child.string.strip():
                    return child.string.strip()
        return None

    except (requests.exceptions.RequestException, Exception) as e:
        print(f"Error getting rating: {e}")
        return None
    
def get_rating_context(url):
    """Gets the text content of the claim_cont div."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        claim_div = soup.find('div', class_='claim_cont')
        if claim_div:
            return claim_div.text.strip()
        return None
    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL for rating context: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred while getting rating context: {e}")
        return None


def get_links_from_article_rail(url):
    try:
        response = requests.get(url)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        youtube_container = soup.find('div', class_='embed_container youtube_container')
        if youtube_container:
            iframe = youtube_container.find('iframe', attrs={'data-src': True})
            if iframe and 'youtube.com/embed' in iframe['data-src']:
                return [iframe['data-src']]
            iframe = youtube_container.find('iframe', attrs={'src': True})
            if iframe and 'youtube.com/embed' in iframe['src']:
              return [iframe['src']]

        article_rail_wrapper = soup.find('div', class_='article_rail_wrapper')
        if article_rail_wrapper:
            links = []
            for a_tag in article_rail_wrapper.find_all('a', href=True):
                links.append(a_tag['href'])
            return links
        return []

    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL: {e}")
        return []
    except Exception as e:
        print(f"An error occurred: {e}")
        return []

def get_first_social_link(url):
    links = get_links_from_article_rail(url)
    if not links:
        return None

    social_media_patterns = [
        r'https?://(?:www\.)?tiktok\.com',
        r'https?://(?:www\.)?youtube\.com',
        r'https?://(?:www\.)?(?:twitter\.com|x\.com)',
        r'https?://(?:www\.)?facebook\.com',
        r'https?://(?:www\.)?reddit\.com',
        r'https?://(?:www\.)?instagram\.com',
    ]

    for link in links:
        for pattern in social_media_patterns:
            if re.match(pattern, link):
                return link

    return "No link associated with article found."


def download_video(url, output_dir, output_filename="downloaded_video.mp4", verbose=False):
    """Downloads a video and saves it to the specified directory."""
    try:
        output_path = os.path.join(output_dir, output_filename)
        ydl_opts = {
            'format': 'best',
            'outtmpl': os.path.join(output_dir, 'temp_download.%(ext)s'),
            'quiet': not verbose,
            'no_warnings': True,
        }

        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info_dict = ydl.extract_info(url, download=False)
            duration = info_dict.get('duration')

            if duration is None:
                 return False, "Could not determine video duration.", None

            if duration > 600:
                return False, "Video exceeds 10-minute limit.", duration

            info_dict = ydl.extract_info(url, download=True)
            downloaded_filename = ydl.prepare_filename(info_dict)


        _, ext = os.path.splitext(downloaded_filename)
        if ext.lower() == '.mp4':
            os.rename(downloaded_filename, output_path)
            return True, "Successfully downloaded and renamed to MP4.", duration

        command = [
            'ffmpeg',
            '-i', downloaded_filename,
            '-c:v', 'libx264',
            '-c:a', 'aac',
            '-strict', 'experimental',
            '-y',
             output_path
        ]

        if verbose:
            print(f"Running ffmpeg command: {' '.join(command)}")

        result = subprocess.run(command, capture_output=True, text=True)

        if result.returncode != 0:
            error_message = f"FFmpeg conversion failed:\nReturn code: {result.returncode}\nStdout: {result.stdout}\nStderr: {result.stderr}"
            if verbose:
                print(error_message)
            os.remove(downloaded_filename)
            return False, error_message, None
        else:
            if verbose:
              print("FFmpeg conversion successful.")
            os.remove(downloaded_filename)
            return True, "Successfully downloaded and converted to MP4.", duration

    except yt_dlp.utils.DownloadError as e:
        return False, f"yt-dlp download error: {e}", None
    except FileNotFoundError:
        return False, "ffmpeg not found.  Please make sure ffmpeg is installed and in your system's PATH.", None
    except Exception as e:
        return False, f"An unexpected error occurred: {e}", None


def download_progress_hook(d):
    if d['status'] == 'downloading':
        print(f"Downloading: {d['_percent_str']} {d['_speed_str']} ETA: {d['_eta_str']}", end='\r')
    elif d['status'] == 'finished':
        print(f"\nDownloaded: {d['filename']}")


def get_youtube_description(url):
    try:
        match = re.search(r"(?:embed/|v/|\?v=|&v=|\.be/)([\w-]+)", url)
        if not match:
            match = re.search(r"youtube\.com/watch\?.*([\w-]+)", url)
            if not match:
                print("Invalid YouTube URL.")
                return None
        video_id = match.group(1)
        watch_url = f"https://www.youtube.com/watch?v={video_id}"

        response = requests.get(watch_url)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        pattern = re.compile(r'(?<=shortDescription":").*?(?=","isCrawlable)')
        match = pattern.search(str(soup))

        if match:
            description = match.group(0).replace('\\n', '\n').replace('\\\\', '\\').replace('\\"', '"')
            return description
        else:
            desc_tag = soup.find('meta', attrs={'name': 'description'})
            if desc_tag and desc_tag.get('content'):
                return desc_tag.get('content')
            print("Description not found using primary or secondary methods.")
            return None

    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None


def get_tweet_text(url):
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")

    driver = webdriver.Chrome(options=chrome_options)

    try:
        driver.get(url)
        time.sleep(5)
        try:
            tweet_text_element = WebDriverWait(driver, 20).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, 'div[data-testid="tweetText"]'))
            )
            spans = tweet_text_element.find_elements(By.TAG_NAME, 'span')
            tweet_text = "".join([span.text for span in spans])
        except TimeoutException:
            print(f"Timeout: Tweet text not found within the time limit for URL: {url}")
            tweet_text = None
        except NoSuchElementException:
            print(f"Error: Tweet element not found for URL: {url}")
            tweet_text = None

    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        tweet_text = None
    finally:
        driver.quit()

    return tweet_text


def get_tiktok_description(tiktok_url):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Referer': 'https://www.tiktok.com/',
        }
        response = requests.get(tiktok_url, headers=headers, timeout=10)
        response.raise_for_status()


        soup = BeautifulSoup(response.text, 'html.parser')
        script_tag = soup.find('script', {'id': '__UNIVERSAL_DATA_FOR_REHYDRATION__'})

        if not script_tag:
            print("Error: Could not find the '__UNIVERSAL_DATA_FOR_REHYDRATION__' script tag.")
            return None
        json_data = json.loads(script_tag.string)

        try:
            description = json_data['__DEFAULT_SCOPE__']['webapp.video-detail']['itemInfo']['itemStruct']['desc']
            return description
        except KeyError as e:
            print(f"Error: Could not find the description in the JSON data.  KeyError: {e}")
            print("JSON structure may have changed. Check the path to the 'desc' field.")
            return None

    except requests.exceptions.RequestException as e:
        print(f"Error: Request failed: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Error: JSON decoding failed: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None


def get_tiktok_video_id(url):
    """Extracts the video ID from a TikTok URL. Supports various URL formats."""
    match = re.search(r'/video/(\d+)', url)
    if match:
        return match.group(1)
    return None


def get_seo_canonical_url(tiktok_url: str) -> str:
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    try:
        response = requests.get(tiktok_url, headers=headers, allow_redirects=True, timeout=10)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        script_tag = soup.find('script', {'id': '__UNIVERSAL_DATA_FOR_REHYDRATION__'})
        if not script_tag:
            print("Error: Could not find the '__UNIVERSAL_DATA_FOR_REHYDRATION__' script tag.")
            return None, None

        json_data = json.loads(script_tag.string)
        try:
            canonical_url = json_data['__DEFAULT_SCOPE__']['seo.abtest']['canonical']
        except:
            canonical_url = None

        video_id = get_tiktok_video_id(canonical_url)
        if video_id is None:
            video_id = get_tiktok_video_id(tiktok_url)

        return canonical_url, video_id

    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None, None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None, None


def get_reddit_post_title(url):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')
        title_element = soup.select_one('[id^="post-title-"]')
        if title_element:
            return title_element.text.strip()
        else:
            return None


    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL: {e}")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


def get_content_from_url(url):
    try:
        if "youtube.com" in url or "youtu.be" in url:
            description = get_youtube_description(url)
            if description:
                return {"site_type": "youtube", "content": description}
            else:
                return None

        elif "twitter.com" in url or "x.com" in url:
            tweet_text = get_tweet_text(url)
            if tweet_text:
                return {"site_type": "twitter", "content": tweet_text}
            else:
                return None

        elif "tiktok.com" in url:
            canonical_url, video_id = get_seo_canonical_url(url)
            if canonical_url:
                description = get_tiktok_description(canonical_url)
            else:
                 description = get_tiktok_description(url)
            if description is not None: 
                return {"site_type": "tiktok", "content": description}
            else:
                return None

        elif "reddit.com" in url:
            title = get_reddit_post_title(url)
            if title:
                return {"site_type": "reddit", "content": title}
            else:
                return None
        else:
            print(f"Unsupported URL: {url}")
            return None
    except Exception as e:
        print(f"An unexpected error in get_content_from_url: {e}")
        return None

import requests
from bs4 import BeautifulSoup

def extract_sentences_with_links(url):
    try:
        response = requests.get(url)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        results = {}

        p_tags = soup.find_all('p')
        for p_tag in p_tags:
            links = p_tag.find_all('a', href=True)
            if links:
                full_text = ' '.join(p_tag.get_text(separator=" ", strip=True).split())
                link_dict = {
                    ' '.join(link.get_text(separator=" ", strip=True).split()): link['href']
                    for link in links
                }
                results[full_text] = link_dict

        blockquote_tags = soup.find_all('blockquote')
        for blockquote_tag in blockquote_tags:
            links = blockquote_tag.find_all('a', href=True)
            if links:
                full_text = ' '.join(blockquote_tag.get_text(separator=" ", strip=True).split())
                link_dict = {
                    ' '.join(link.get_text(separator=" ", strip=True).split()): link['href']
                    for link in links
                }
                results[full_text] = link_dict

        return results

    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL: {e}")
        return {}
    except Exception as e:
        print(f"An error occurred: {e}")
        return {}



if __name__ == '__main__':
    video_count = 1  # DO NOT MODIFY USED FOR LABELING
    results = []

    input_file = "input.txt"
    download_dir = "/fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/snopes"
    output_json_path = os.path.join("/fs/clip-projects/rlab/atrey/ooc-misinformation/scraping", "video_data_snopes.json")

    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    try:
        with open(input_file, "r") as f:
            all_urls = [line.strip() for line in f]
    except FileNotFoundError:
        print(f"Error: Input file '{input_file}' not found.")
        exit(1)

    print(f"There are {len(all_urls)} videos to be processed.")

    for URL in all_urls:
        headline = get_headline(URL)
        subheadline = get_subheadline(URL)
        social_platform = _get_social_media_platform(get_first_social_link(URL))
        rating = get_rating(URL)
        rating_context = get_rating_context(URL)
        social_link = get_first_social_link(URL)
        destination_path = None
        download_success = False
        social_text = None
        video_duration = None
        external = extract_sentences_with_links(URL)

        if social_link and social_link != "No link associated with article found.":
            download_success, download_message, duration = download_video(social_link, download_dir, output_filename=f"video_{video_count}.mp4")
            if download_success:
                destination_path = os.path.join(download_dir, f"video_{video_count}.mp4")
                print(f"Video downloaded to {destination_path}")
            video_duration = duration
            content_result = get_content_from_url(social_link)
            if content_result:
                social_text = content_result['content']
        else:
            download_message = "No social media link found, skipping download."

        video_data = {
            'snope_url': URL,
            'snope_headline': headline,
            'snope_subheadline': subheadline,
            'social_platform': social_platform,
            'rating': rating,
            'rating_context': rating_context,
            'social_link': social_link,
            'social_duration': video_duration,
            'social_text': social_text,
            'external': external,
            'download_success': download_success,
            'download_message': download_message,
            'drive_path': destination_path,
        }
        results.append(video_data)

        try:
            with open(output_json_path, "w") as f:
                json.dump(results, f, indent=4)
            print(f"Video data saved to {output_json_path} (after processing video {video_count})")
        except Exception as e:
            print(f"Error writing to JSON file: {e}")

        video_count += 1
        print("Next video processing\n\n")

There are 95 videos to be processed.
Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_1.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 1)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_2.mp4
Error: Could not find the description in the JSON data.  KeyError: 'itemInfo'
JSON structure may have changed. Check the path to the 'desc' field.
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 2)
Next video processing




ERROR: [TikTok] 7012463416216489218: Video not available, status code 10222; please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Error: Could not find the description in the JSON data.  KeyError: 'itemInfo'
JSON structure may have changed. Check the path to the 'desc' field.
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 3)
Next video processing




ERROR: Unsupported URL: https://x.com/realDonaldTrump?ref_src=twsrc%5Etfw


Timeout: Tweet text not found within the time limit for URL: https://twitter.com/realDonaldTrump?ref_src=twsrc%5Etfw
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 4)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_5.mp4
Unsupported URL: https://www.facebook.com/ionflix/videos/1829274207291885/
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 5)
Next video processing




ERROR: [Reddit] tsw5nk8s6fe81: Requested format is not available. Use --list-formats for a list of available formats


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 6)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_7.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 7)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_8.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 8)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 9)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 10)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformati

ERROR: [twitter] 1265855459719892993: No video could be found in this tweet


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 15)
Next video processing




ERROR: [facebook] 2548841225330935: No video formats found!; please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Unsupported URL: https://www.facebook.com/CambioDigital/videos/2548841225330935/
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 16)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_17.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 17)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_18.mp4
Unsupported URL: https://www.facebook.com/groups/441227163449213/permalink/802269580678301/
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 18)
Next video processing




ERROR: [Imgur] 1uZWelJ: Unable to download JSON metadata: HTTP Error 404: Not Found (caused by <HTTPError 404: Not Found>)


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 19)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_20.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 20)
Next video processing




ERROR: [facebook] 10159034024061460: This video is only available for registered users. Use --cookies, --cookies-from-browser, --username and --password, --netrc-cmd, or --netrc (facebook) to provide account credentials. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies


Unsupported URL: https://www.facebook.com/35499336459/posts/10159034024061460
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 21)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 22)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_23.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 23)
Next video processing




ERROR: [twitter] 1650863463634448388: NSFW tweet requires authentication. Use --cookies, --cookies-from-browser, --username and --password, --netrc-cmd, or --netrc (twitter) to provide account credentials. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies


Timeout: Tweet text not found within the time limit for URL: https://twitter.com/reserch_human/status/1650863463634448388
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 24)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_25.mp4
Unsupported URL: https://www.facebook.com/melaniemellymel13/posts/pfbid021YAzNH51e4j8UzDgYwmBuPWnyoLqBUXtwPPUEt4VsQFPXAFXBrgMDMsuAmepJU2Ul?__cft__[0]=AZVe1HoJ5XAINEAHZlsBkSlGDgjVuldDcQaz7sebin3ovH0rdoa9JZiLmOLngUeJh8xbhjWNgbkuh0_MrMXtxYqfP1gC6Z4XZuH6E6Qe28adwLEtkPMpH_sHSA-AhL-xQlU-Xrg2c33wFN_ogJRBpnPTEmjRK75CwvrwaBBcVr8AUQ&__tn__=%2CO%2CP-R
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 25)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_26.mp4
Video data saved to /fs/clip-

ERROR: [youtube:tab] ObamaSnippetsDotCom": Unable to download API page: HTTP Error 404: Not Found (caused by <HTTPError 404: Not Found>)


Invalid YouTube URL.
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 32)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 33)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_34.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 34)
Next video processing




ERROR: [twitter] 1227570288071716864: No video could be found in this tweet


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 35)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 36)
Next video processing




ERROR: [Reddit] 1654hjw: HTTP Error 403 Forbidden; reason given: None; please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 37)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_38.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 38)
Next video processing




ERROR: [twitter] 1457804525427908608: No video could be found in this tweet


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 39)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_40.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 40)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_41.mp4
Unsupported URL: https://www.facebook.com/MasterBert195/posts/393037023107806/
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 41)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_42.mp4
Unsupported URL: https://www.facebook.com/1787534361480226/videos/pcb.2141356472764678/2141354776098181/
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misi

ERROR: [youtube] Oww5zb0XvHg: Video unavailable. This video is no longer available because the YouTube account associated with this video has been terminated.


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 43)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_44.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 44)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_45.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 45)
Next video processing




ERROR: [twitter] 1521701921563815937: Suspended


Timeout: Tweet text not found within the time limit for URL: https://twitter.com/labian1807/status/1521701921563815937
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 46)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_47.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 47)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_48.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 48)
Next video processing




ERROR: [twitter] 1867317812450246759: No video could be found in this tweet


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 49)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_50.mp4
Unsupported URL: https://www.facebook.com/AiydaNurhidayah/videos/327356836054673/
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 50)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_51.mp4
Unsupported URL: https://www.facebook.com/158383721771326/videos/468183177282071/
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 51)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_52.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/

ERROR: Unsupported URL: https://www.facebook.com/photo?fbid=10158003446139620&set=pcb.3242033472584438


Unsupported URL: https://www.facebook.com/photo?fbid=10158003446139620&set=pcb.3242033472584438
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 57)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_58.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 58)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_59.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 59)
Next video processing




ERROR: Unsupported URL: https://www.facebook.com/YodaBBYABY


Unsupported URL: https://www.facebook.com/YodaBBYABY
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 60)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_61.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 61)
Next video processing




ERROR: [facebook] 512515981158720: No video formats found!; please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Unsupported URL: https://www.facebook.com/reel/512515981158720
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 62)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 63)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 64)
Next video processing




ERROR: [youtube] tWVQFtYpGnY: Video unavailable. This video is no longer available due to a copyright claim by Rick Lax Entertainment


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 65)
Next video processing




ERROR: Unsupported URL: https://x.com/adubb1022


Timeout: Tweet text not found within the time limit for URL: https://twitter.com/adubb1022
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 66)
Next video processing




ERROR: [instagram:user] warmnfuzzy.tv: Unable to extract data; please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Unsupported URL: https://www.instagram.com/warmnfuzzy.tv/
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 67)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 68)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_69.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 69)
Next video processing




ERROR: [TikTok] 7434566887276662049: Video not available, status code 100004; please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Error: Could not find the description in the JSON data.  KeyError: 'webapp.video-detail'
JSON structure may have changed. Check the path to the 'desc' field.
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 70)
Next video processing




ERROR: [facebook] 2731565820205672: Cannot parse data; please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Unsupported URL: https://www.facebook.com/xyxmfn/videos/vb.238815406778496/2731565820205672/?type=2&theater
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 71)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 72)
Next video processing


Invalid YouTube URL.
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 73)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_74.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 74)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_75.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scrapin

ERROR: [youtube] fQupQPCEw4U: Video unavailable


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 82)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_83.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 83)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_84.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 84)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 85)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 86)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinf

ERROR: [youtube] BuO6yJrRAYw: Video unavailable


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 88)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 89)
Next video processing




ERROR: [facebook] 448116063831287: No video formats found!; please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Unsupported URL: https://www.facebook.com/watch/?v=448116063831287
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 90)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_91.mp4
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 91)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 92)
Next video processing


Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/video_data.json (after processing video 93)
Next video processing


Video downloaded to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/downloaded_videos/video_94.mp4
Unsupported URL: https://www.instagram.com/p/BiMxAXalARt/
Video data saved to /fs/clip-projects/rlab/atrey/ooc-misinformation/scraping/vi