### Code Scraping : Facebook Group

In [2]:
import os
import time
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

FACEBOOK_EMAIL = os.getenv('FACEBOOK_EMAIL')
FACEBOOK_PASSWORD = os.getenv('FACEBOOK_PASSWORD')
GROUP_URL = 'https://www.facebook.com/groups/247152564671716'
SCROLL_COUNT = 5
OUTPUT_FILENAME = 'test-scraping.txt'
PROFILE_PATH = r'C:\chrome-profiles\fb-scraper'

def scrape_post_details(driver, post_element):
    details = {}
    
    see_more = post_element.find_elements(By.XPATH, ".//div[text()='See more' or text()='ดูเพิ่มเติม']")
    if see_more:
        driver.execute_script("arguments[0].click();", see_more[0])
        time.sleep(0.3)

    author_link = post_element.find_elements(By.CSS_SELECTOR, "h3 a[role='link']")
    if author_link:
        details["author_name"] = author_link[0].text
        details["author_url"] = author_link[0].get_attribute('href')

    content_divs = post_element.find_elements(By.CSS_SELECTOR, "div[data-ad-preview='message'], div[dir='auto']")
    if content_divs:
        details["post_content"] = "\n".join(div.text for div in content_divs if div.text.strip())

    timestamp_link = post_element.find_elements(By.CSS_SELECTOR, "span > a[role='link'][href*='/posts/'], span > a[role='link'][href*='?post_id=']")
    if timestamp_link:
        details["post_timestamp"] = timestamp_link[0].text
        details["post_url"] = timestamp_link[0].get_attribute('href')

    footer = post_element.find_elements(By.CSS_SELECTOR, "div[role='toolbar']")
    if footer:
        reactions = footer[0].find_elements(By.CSS_SELECTOR, "span[aria-label*='reaction']")
        details["reactions"] = reactions[0].get_attribute('aria-label') if reactions else "0"
        
        comments = footer[0].find_elements(By.XPATH, ".//div[contains(text(), 'comment') or contains(text(), 'ความคิดเห็น')]")
        details["comments"] = comments[0].text if comments else "0 comments"
    else:
        details["reactions"] = "0"
        details["comments"] = "0 comments"

    if details.get("author_name") and details.get("post_content"):
        return details
    return None

def main():
    options = uc.ChromeOptions()
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-notifications")
    options.add_argument(f"--user-data-dir={PROFILE_PATH}")
    
    with uc.Chrome(options=options, use_subprocess=True, version_main=137) as driver:
        driver.get(GROUP_URL)

        try:
            email_input = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.NAME, "email"))
            )
            email_input.send_keys(FACEBOOK_EMAIL)
            driver.find_element(By.NAME, "pass").send_keys(FACEBOOK_PASSWORD, Keys.RETURN)
        except TimeoutException:
            pass

        WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[role='feed']")))
        
        for _ in range(SCROLL_COUNT):
            last_height = driver.execute_script("return document.body.scrollHeight")
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            try:
                WebDriverWait(driver, 5, 0.5).until(
                    lambda d: d.execute_script("return document.body.scrollHeight") > last_height
                )
            except TimeoutException:
                break
        
        posts_data = []
        posts = driver.find_elements(By.CSS_SELECTOR, "div[role='article']")
        for post in posts:
            details = scrape_post_details(driver, post)
            if details:
                posts_data.append(details)

        if posts_data:
            with open(OUTPUT_FILENAME, 'w', encoding='utf-8') as f:
                for i, post in enumerate(posts_data, 1):
                    f.write(f"=============== POST #{i} ===============\n")
                    f.write(f"Author: {post.get('author_name', 'N/A')}\n")
                    f.write(f"Author URL: {post.get('author_url', 'N/A')}\n")
                    f.write(f"Timestamp: {post.get('post_timestamp', 'N/A')}\n")
                    f.write(f"Post URL: {post.get('post_url', 'N/A')}\n")
                    f.write(f"Reactions: {post.get('reactions', 'N/A')}\n")
                    f.write(f"Comments: {post.get('comments', 'N/A')}\n")
                    f.write("-" * 20 + " CONTENT " + "-" * 20 + "\n")
                    f.write(f"{post.get('post_content', 'No content found.')}\n\n\n")

if __name__ == "__main__":
    main()

### Code Scraping : Facebook Page

In [None]:
import time
import os
from dotenv import load_dotenv
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

load_dotenv()

FACEBOOK_EMAIL = os.getenv('FACEBOOK_EMAIL')    
FACEBOOK_PASSWORD = os.getenv('FACEBOOK_PASSWORD')
PAGE_URL = 'https://www.facebook.com/luminarijewelry'
SCROLL_COUNT = 5
OUTPUT_FILENAME = 'facebook_page_posts.txt'
PROFILE_PATH = r'C:\chrome-profiles\fb-scraper'

def scrape_page_post_details(driver, post_element):
    details = {}
    
    see_more = post_element.find_elements(By.XPATH, ".//div[text()='See more' or text()='ดูเพิ่มเติม']")
    if see_more:
        driver.execute_script("arguments[0].click();", see_more[0])
        time.sleep(0.5)

    author_link = post_element.find_elements(By.CSS_SELECTOR, "h2 a[role='link']")
    details["author_name"] = author_link[0].text if author_link else "Page"
    details["author_url"] = author_link[0].get_attribute('href') if author_link else None

    content_divs = post_element.find_elements(By.CSS_SELECTOR, "div[data-ad-preview='message'], div[style='text-align: start;']")
    details["post_content"] = "\n".join(div.text for div in content_divs if div.text.strip()) if content_divs else None

    timestamp_link = post_element.find_elements(By.CSS_SELECTOR, "span > a[role='link'][href*='story_fbid='], span > a[role='link'][href*='/posts/']")
    if timestamp_link:
        details["post_timestamp"] = timestamp_link[0].text
        details["post_url"] = timestamp_link[0].get_attribute('href')

    feedback_container = post_element.find_elements(By.CSS_SELECTOR, "div[aria-label*='reactions'], div[role='toolbar']")
    if feedback_container:
        reactions = feedback_container[0].find_elements(By.CSS_SELECTOR, "span[aria-label]")
        details["reactions"] = reactions[0].get_attribute('aria-label') if reactions else "0"
        comments = feedback_container[0].find_elements(By.XPATH, ".//div[contains(text(), 'comment') or contains(text(), 'ความคิดเห็น')]")
        details["comments"] = comments[0].text if comments else "0 comments"
    else:
        details["reactions"] = "0"
        details["comments"] = "0 comments"

    return details if details.get("author_name") and details.get("post_content") else None

def main():
    options = uc.ChromeOptions()
    options.add_argument("--disable-notifications")
    options.add_argument(f"--user-data-dir={PROFILE_PATH}")
    
    with uc.Chrome(options=options, use_subprocess=True, version_main=137) as driver:
        driver.get(PAGE_URL)

        email_input = driver.find_elements(By.NAME, "email")
        if email_input:
            email_input[0].send_keys(FACEBOOK_EMAIL)
            driver.find_element(By.NAME, "pass").send_keys(FACEBOOK_PASSWORD, Keys.RETURN)

        WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[role='main']")))

        last_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(SCROLL_COUNT):
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        posts_data = []
        posts = driver.find_elements(By.CSS_SELECTOR, "div[role='article']")
        for post in posts:
            details = scrape_page_post_details(driver, post)
            if details:
                posts_data.append(details)

        if posts_data:
            with open(OUTPUT_FILENAME, 'w', encoding='utf-8') as f:
                for i, post in enumerate(posts_data, 1):
                    f.write(f"=============== POST #{i} ===============\n"
                           f"Author: {post.get('author_name', 'N/A')}\n"
                           f"Author URL: {post.get('author_url', 'N/A')}\n"
                           f"Timestamp: {post.get('post_timestamp', 'N/A')}\n"
                           f"Post URL: {post.get('post_url', 'N/A')}\n"
                           f"Reactions: {post.get('reactions', 'N/A')}\n"
                           f"Comments: {post.get('comments', 'N/A')}\n"
                           "-" * 20 + " CONTENT " + "-" * 20 + "\n"
                           f"{post.get('post_content', 'No content found.')}\n\n\n")

if __name__ == "__main__":
    if not FACEBOOK_EMAIL or not FACEBOOK_PASSWORD:
        raise ValueError("Missing Facebook credentials in .env file")
    main()

### Scraping : post, Share, Comment

In [1]:
import time
import os
import csv
from dotenv import load_dotenv
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException

load_dotenv()
FACEBOOK_EMAIL = os.getenv('FACEBOOK_EMAIL')
FACEBOOK_PASSWORD = os.getenv('FACEBOOK_PASSWORD')
PAGE_URL = 'https://www.facebook.com/kosjewelry.co'
TARGET_POST_COUNT = 100
OUTPUT_CSV_FILE = 'Facebook_post_urls.csv'
PROFILE_PATH = r'C:\chrome-profiles\fb-pipeline-stage1-persistent'

def login_to_facebook(driver):
    driver.get("https://www.facebook.com")
    time.sleep(3)
    cookie_selectors = [
        "button[data-cookiebanner='accept_button_dialog']",
        "button[title='Allow all cookies']",
        "button[title='Accept All']",
    ]
    for selector in cookie_selectors:
        buttons = driver.find_elements(By.CSS_SELECTOR, selector)
        if buttons and buttons[0].is_displayed():
            buttons[0].click()
            time.sleep(2)
            break

    email_input = driver.find_elements(By.ID, "email")
    pass_input = driver.find_elements(By.ID, "pass")
    if email_input and pass_input:
        email_input[0].send_keys(FACEBOOK_EMAIL)
        pass_input[0].send_keys(FACEBOOK_PASSWORD)
        pass_input[0].send_keys(Keys.RETURN)
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "a[aria-label='Home']"))
        )

def collect_post_urls(driver, page_url):
    driver.get(page_url)
    WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[role='main']")))
    time.sleep(3)
    urls_in_this_session = set()
    last_height = driver.execute_script("return document.body.scrollHeight")
    scroll_attempts = 0
    while scroll_attempts < 50:
        js_script = """
            var links = document.querySelectorAll("a[href*='/posts/'], a[href*='/videos/'], a[href*='/reels/']");
            var hrefs = [];
            for (var i = 0; i < links.length; i++) {
                hrefs.push(links[i].getAttribute('href'));
            }
            return hrefs;
        """
        hrefs_list = driver.execute_script(js_script)
        for url in hrefs_list:
            if url:
                clean_url = url.split('?')[0]
                urls_in_this_session.add(clean_url)
        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(4)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
        scroll_attempts += 1
    return urls_in_this_session

def main():
    all_found_urls = set()
    while len(all_found_urls) < TARGET_POST_COUNT:
        previous_count = len(all_found_urls)
        options = uc.ChromeOptions()
        options.add_argument("--disable-notifications")
        options.add_argument("--lang=en-US")
        options.add_argument(f"--user-data-dir={PROFILE_PATH}")
        with uc.Chrome(options=options, use_subprocess=True) as driver:
            login_to_facebook(driver)
            newly_scraped_urls = collect_post_urls(driver, PAGE_URL)
        all_found_urls.update(newly_scraped_urls)
        if len(all_found_urls) == previous_count and previous_count > 0:
            break
    if all_found_urls:
        final_urls = list(all_found_urls)[:TARGET_POST_COUNT]
        with open(OUTPUT_CSV_FILE, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(['PostURL'])
            for url in final_urls:
                writer.writerow([url])

if __name__ == "__main__":
    main()

## Instagram Scraping Post

In [3]:
import time
import csv
import os
from dotenv import load_dotenv
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

load_dotenv()

INSTAGRAM_USERNAME = os.getenv('INSTAGRAM_USERNAME')
INSTAGRAM_PASSWORD = os.getenv('INSTAGRAM_PASSWORD')
PAGE_URL = 'https://www.instagram.com/kosjewelry.co'
TARGET_POST_COUNT = 100
OUTPUT_CSV_FILE = 'post_urls_optimized.csv'
PROFILE_PATH = r'C:\chrome-profiles\ig-pipeline-stage1-persistent'
WAIT_TIMEOUT = 15

def login_to_instagram(driver: uc.Chrome):
    driver.get("https://www.instagram.com/accounts/login/")
    wait = WebDriverWait(driver, WAIT_TIMEOUT)

    username_input = wait.until(EC.visibility_of_element_located((By.NAME, "username")))
    password_input = driver.find_element(By.NAME, "password")

    username_input.send_keys(INSTAGRAM_USERNAME)
    password_input.send_keys(INSTAGRAM_PASSWORD)
    password_input.send_keys(Keys.RETURN)

    wait.until(EC.presence_of_element_located((By.XPATH, "//*[@aria-label='Home' or @aria-label='หน้าหลัก']")))
    
    not_now_btn = WebDriverWait(driver, 5).until(
        EC.element_to_be_clickable((By.XPATH, "//div[@role='button' and (text()='Not Now' or text()='ไว้ทีหลัง')]"))
    )
    not_now_btn.click()

    turn_off_btn = WebDriverWait(driver, 5).until(
        EC.element_to_be_clickable((By.XPATH, "//button[text()='Turn Off' or text()='ปิด']"))
    )
    turn_off_btn.click()

def collect_post_urls(driver: uc.Chrome, page_url: str, target_count: int) -> list[str]:
    driver.get(page_url)
    wait = WebDriverWait(driver, WAIT_TIMEOUT)
    
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "main[role='main']")))

    post_urls = set()
    
    js_get_links = "return Array.from(document.querySelectorAll(\"a[href^='/p/'], a[href*='/reel/']\")).map(a => a.href);"
    
    last_height = driver.execute_script("return document.body.scrollHeight")

    while len(post_urls) < target_count:
        hrefs = driver.execute_script(js_get_links)
        for url in hrefs:
            clean_url = url.split('?')[0]
            if "/p/" in clean_url or "/reel/" in clean_url:
                post_urls.add(clean_url)

        if len(post_urls) >= target_count:
            break

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        
        time.sleep(3)

        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
            
    return list(post_urls)[:target_count]

def main():
    options = uc.ChromeOptions()
    options.add_argument("--disable-notifications")
    options.add_argument("--lang=en-US")
    options.add_argument(f"--user-data-dir={PROFILE_PATH}")

    with uc.Chrome(options=options) as driver:
        driver.get("https://www.instagram.com")
        time.sleep(2)
        if "login" in driver.current_url:
            login_to_instagram(driver)

        all_found_urls = collect_post_urls(driver, PAGE_URL, TARGET_POST_COUNT)

    if all_found_urls:
        print(f"Collected {len(all_found_urls)} URLs. Saving to {OUTPUT_CSV_FILE}...")
        with open(OUTPUT_CSV_FILE, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(['PostURL'])
            for url in all_found_urls:
                writer.writerow([url])
        print("Successfully saved to CSV.")
    else:
        print("No post URLs were found.")

if __name__ == "__main__":
    main()

Collected 100 URLs. Saving to post_urls_optimized.csv...
Successfully saved to CSV.


In [None]:

import time
import csv
import os
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException

INSTAGRAM_USERNAME = "YOUR_USERNAME_HERE"
INSTAGRAM_PASSWORD = "YOUR_PASSWORD_HERE"
POST_URL = 'https://www.instagram.com/kosjewelry.co/reel/DEgv21dTZyD/'
OUTPUT_CSV_FILE = 'instagram_likers_data.csv'
PROFILE_PATH = r'C:\chrome-profiles\ig-pipeline-stage-final'
WAIT_TIMEOUT = 25

def login_to_instagram(driver: uc.Chrome):
    driver.get("https://www.instagram.com/accounts/login/")
    wait = WebDriverWait(driver, WAIT_TIMEOUT)
    
    username_field = wait.until(EC.visibility_of_element_located((By.NAME, "username")))
    password_field = driver.find_element(By.NAME, "password")
    
    username_field.send_keys(INSTAGRAM_USERNAME)
    password_field.send_keys(INSTAGRAM_PASSWORD)
    password_field.submit()

    wait.until(lambda d: "instagram.com" in d.current_url and "/login/" not in d.current_url)

    try:
        wait.until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Not Now']"))).click()
    except TimeoutException:
        pass 

    try:
        wait.until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Turn Off']"))).click()
    except TimeoutException:
        pass

def scrape_likers_data(driver: uc.Chrome, post_url: str) -> list[dict]:
    driver.get(post_url)
    wait = WebDriverWait(driver, WAIT_TIMEOUT)

    likes_link_xpath = "//a[contains(@href, '/liked_by/')]"
    likes_link = wait.until(EC.element_to_be_clickable((By.XPATH, likes_link_xpath)))
    driver.execute_script("arguments[0].click();", likes_link)

    scrollable_div_xpath = "//div[@role='dialog']//div[contains(@class, 'x1n2onr6')]/div"
    scrollable_div = wait.until(EC.presence_of_element_located((By.XPATH, scrollable_div_xpath)))

    scraped_profiles = {}
    last_height = 0
    
    while True:
        user_rows_xpath = "//div[@role='dialog']//a[contains(@class, 'x1i10hfl') and @role='link']"
        
        try:
            wait.until(EC.presence_of_all_elements_located((By.XPATH, user_rows_xpath)))
        except TimeoutException:
            break

        user_links = driver.find_elements(By.XPATH, user_rows_xpath)
        
        for link in user_links:
            try:
                href = link.get_attribute('href')
                username = link.find_element(By.XPATH, ".//span[contains(@class, '_ap3a')]").text
                
                if username and href and href not in scraped_profiles:
                    scraped_profiles[href] = {
                        "username": username,
                        "profile_url": href
                    }
            except NoSuchElementException:
                continue

        driver.execute_script("arguments[0].scrollTo(0, arguments[0].scrollHeight);", scrollable_div)
        time.sleep(3)
        
        new_height = driver.execute_script("return arguments[0].scrollHeight", scrollable_div)
        if new_height == last_height:
            break
        last_height = new_height

    return list(scraped_profiles.values())

def main():
    options = uc.ChromeOptions()
    options.add_argument(f"--user-data-dir={PROFILE_PATH}")
    options.add_argument("--disable-notifications")
    options.add_argument("--lang=en")
    
    with uc.Chrome(options=options) as driver:
        driver.get("https://www.instagram.com")
        time.sleep(4)
        if "/login/" in driver.current_url:
            login_to_instagram(driver)
            
        likers_data = scrape_likers_data(driver, POST_URL)
        
        if likers_data:
            print(f"Scraped data for {len(likers_data)} unique profiles. Saving to {OUTPUT_CSV_FILE}.")
            with open(OUTPUT_CSV_FILE, 'w', newline='', encoding='utf-8') as f:
                writer = csv.DictWriter(f, fieldnames=["username", "profile_url"])
                writer.writeheader()
                writer.writerows(likers_data)
            print("Data saved successfully.")
        else:
            print("No profile data was scraped.")

if __name__ == "__main__":
    main()

Scraped data for 11 unique profiles. Saving to instagram_likers_data.csv.
Data saved successfully.


facebook scraping Using selenium

In [2]:
import time
import os
import csv
from dotenv import load_dotenv
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

load_dotenv()
FACEBOOK_EMAIL = os.getenv('FACEBOOK_EMAIL')
FACEBOOK_PASSWORD = os.getenv('FACEBOOK_PASSWORD')

PAGE_URL = 'https://www.facebook.com/photo/?fbid=1302039414623773&set=a.325524738941917'
OUTPUT_CSV_FILE = 'facebook_post_data.csv'
PROFILE_PATH = r'C:\chrome-profiles\fb-pipeline-stage1-persistent'

def login_to_facebook(driver):
    driver.get("https://www.facebook.com")
    time.sleep(3)
    
    cookie_selectors = [
        "button[data-cookiebanner='accept_button_dialog']",
        "button[title='Allow all cookies']",
        "button[title='Accept All']",
    ]
    for selector in cookie_selectors:
        buttons = driver.find_elements(By.CSS_SELECTOR, selector)
        if buttons and buttons[0].is_displayed():
            buttons[0].click()
            time.sleep(2)
            break
            
    email_input_list = driver.find_elements(By.ID, "email")
    if email_input_list:
        pass_input = driver.find_element(By.ID, "pass")
        email_input_list[0].send_keys(FACEBOOK_EMAIL)
        pass_input.send_keys(FACEBOOK_PASSWORD)
        pass_input.submit()
        time.sleep(5)

def scrape_post_details(driver, post_url):
    driver.get(post_url)
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "div[role='main']"))
    )
    time.sleep(3)

    likes_element = driver.find_element(By.CSS_SELECTOR, "span.xt0b8zv.x1jx94hy")
    likes = ''.join(filter(str.isdigit, likes_element.text)) or '0'

    count_elements = driver.find_elements(By.CSS_SELECTOR, "span.xkrqix3.x1sur9pj")
    comments = count_elements[0].text.strip()
    shares = count_elements[1].text.strip()

    return {
        'url': post_url,
        'likes': likes,
        'comments': comments,
        'shares': shares
    }

def main():
    options = uc.ChromeOptions()
    options.add_argument("--disable-notifications")
    options.add_argument("--lang=en-US")
    options.add_experimental_option('prefs', {'intl.accept_languages': 'en-US,en'})
    options.add_argument(f"--user-data-dir={PROFILE_PATH}")

    post_data = []

    with uc.Chrome(options=options, use_subprocess=True) as driver:
        login_to_facebook(driver)
        
        details = scrape_post_details(driver, PAGE_URL)
        if details:
            post_data.append(details)

    if post_data:
        with open(OUTPUT_CSV_FILE, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=['url', 'likes', 'comments', 'shares'])
            writer.writeheader()
            writer.writerows(post_data)
        print(f"Scraped data for {len(post_data)} post(s) and saved to {OUTPUT_CSV_FILE}")

if __name__ == "__main__":
    main()

Scraped data for 1 post(s) and saved to facebook_post_data.csv


In [1]:
import time
import os
import csv
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

FACEBOOK_EMAIL = os.getenv('FACEBOOK_EMAIL')
FACEBOOK_PASSWORD = os.getenv('FACEBOOK_PASSWORD')

POST_URL = 'https://www.facebook.com/kosjewelry.co'
OUTPUT_CSV_FILE = 'facebook_reactors.csv'
PROFILE_PATH = r'C:\chrome-profiles\fb-scraper-profile'

def login_to_facebook(driver):
    driver.get("https://www.facebook.com")
    time.sleep(3)
    
    cookie_buttons = driver.find_elements(By.CSS_SELECTOR, "button[data-cookiebanner='accept_button_dialog']")
    if cookie_buttons and cookie_buttons[0].is_displayed():
        cookie_buttons[0].click()
        time.sleep(2)
        
    email_input_list = driver.find_elements(By.ID, "email")
    if email_input_list:
        pass_input = driver.find_element(By.ID, "pass")
        email_input_list[0].send_keys(FACEBOOK_EMAIL)
        pass_input.send_keys(FACEBOOK_PASSWORD)
        pass_input.submit()
        time.sleep(5)

def scrape_post_reactors(driver, post_url):
    driver.get(post_url)

    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "div[role='main']"))
    )
    time.sleep(3)

    reactors_button_selector = "div.x78zum5.xdt5ytf span.xt0b8zv.x1jx94hy"
    reactors_button = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, reactors_button_selector))
    )
    
    driver.execute_script("arguments[0].scrollIntoView(true);", reactors_button)
    time.sleep(1)
    driver.execute_script("arguments[0].click();", reactors_button)

    dialog_selector = "div[role='dialog']"
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, dialog_selector))
    )

    scraped_profiles = {}
    last_count = -1
    no_change_count = 0
    
    while no_change_count < 3:
        dialog = driver.find_element(By.CSS_SELECTOR, dialog_selector)
        
        profile_elements = dialog.find_elements(By.CSS_SELECTOR, "span.xjp7ctv > a")
        
        for element in profile_elements:
            name = element.text
            url = element.get_attribute('href')
            if name and url and url not in scraped_profiles:
                clean_url = url.split('?')[0]
                scraped_profiles[url] = {'profile_name': name, 'profile_url': clean_url}

        driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', dialog)
        time.sleep(2.5)
        
        current_count = len(scraped_profiles)
        if current_count == last_count:
            no_change_count += 1
        else:
            last_count = current_count
            no_change_count = 0

    return list(scraped_profiles.values())

def main():
    os.makedirs(PROFILE_PATH, exist_ok=True)
    
    options = uc.ChromeOptions()
    options.add_argument("--disable-notifications")
    options.add_argument("--lang=en-US")
    options.add_experimental_option('prefs', {'intl.accept_languages': 'en-US,en'})
    options.add_argument(f"--user-data-dir={PROFILE_PATH}")

    with uc.Chrome(options=options, use_subprocess=True) as driver:
        login_to_facebook(driver)
        reactors_data = scrape_post_reactors(driver, POST_URL)

    if reactors_data:
        with open(OUTPUT_CSV_FILE, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=['profile_name', 'profile_url'])
            writer.writeheader()
            writer.writerows(reactors_data)
        print(f"Scraping complete. Saved {len(reactors_data)} profiles to {OUTPUT_CSV_FILE}.")

if __name__ == "__main__":
    main()

StaleElementReferenceException: Message: stale element reference: stale element not found in the current frame
  (Session info: chrome=138.0.7204.97); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#staleelementreferenceexception
Stacktrace:
	GetHandleVerifier [0x0x5c44a3+62419]
	GetHandleVerifier [0x0x5c44e4+62484]
	(No symbol) [0x0x402133]
	(No symbol) [0x0x408909]
	(No symbol) [0x0x40aec3]
	(No symbol) [0x0x491874]
	(No symbol) [0x0x46f46c]
	(No symbol) [0x0x49087a]
	(No symbol) [0x0x46f266]
	(No symbol) [0x0x43e852]
	(No symbol) [0x0x43f6f4]
	GetHandleVerifier [0x0x834793+2619075]
	GetHandleVerifier [0x0x82fbaa+2599642]
	GetHandleVerifier [0x0x5eb04a+221050]
	GetHandleVerifier [0x0x5db2c8+156152]
	GetHandleVerifier [0x0x5e1c7d+183213]
	GetHandleVerifier [0x0x5cc388+94904]
	GetHandleVerifier [0x0x5cc512+95298]
	GetHandleVerifier [0x0x5b766a+9626]
	BaseThreadInitThunk [0x0x75e85d49+25]
	RtlInitializeExceptionChain [0x0x77b8d1ab+107]
	RtlGetAppContainerNamedObjectPath [0x0x77b8d131+561]


In [4]:
import time
import os
import csv
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import TimeoutException

FACEBOOK_EMAIL = os.getenv('FACEBOOK_EMAIL')
FACEBOOK_PASSWORD = os.getenv('FACEBOOK_PASSWORD')

PAGE_URL = 'https://www.facebook.com/kosjewelry.co'
OUTPUT_CSV_FILE = 'facebook_page_reactors.csv'
PROFILE_PATH = r'C:\chrome-profiles\fb-scraper-profile'

def login_to_facebook(driver):
    driver.get("https://www.facebook.com")
    time.sleep(3)
    
    cookie_buttons = driver.find_elements(By.CSS_SELECTOR, "button[data-cookiebanner='accept_button_dialog']")
    if cookie_buttons and cookie_buttons[0].is_displayed():
        cookie_buttons[0].click()
        time.sleep(2)
        
    email_input_list = driver.find_elements(By.ID, "email")
    if email_input_list:
        pass_input = driver.find_element(By.ID, "pass")
        email_input_list[0].send_keys(FACEBOOK_EMAIL)
        pass_input.send_keys(FACEBOOK_PASSWORD)
        pass_input.submit()
        time.sleep(5)

def scrape_page_feed(driver, page_url):
    driver.get(page_url)
    WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[role='main']")))
    time.sleep(3)

    print("Scrolling the page feed to load posts...")
    for _ in range(5):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)

    all_scraped_profiles = {}
    reactors_button_selector = "div.x78zum5.xdt5ytf span.xt0b8zv.x1jx94hy"
    
    post_reaction_buttons = driver.find_elements(By.CSS_SELECTOR, reactors_button_selector)
    print(f"Found {len(post_reaction_buttons)} posts with reaction buttons to process.")

    for i in range(len(post_reaction_buttons)):
        buttons = driver.find_elements(By.CSS_SELECTOR, reactors_button_selector)
        if i >= len(buttons):
            break
        button = buttons[i]
        
        try:
            driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", button)
            time.sleep(1)
            driver.execute_script("arguments[0].click();", button)
            print(f"\nProcessing post {i + 1}...")

            dialog_selector = "div[role='dialog']"
            WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, dialog_selector)))

            no_change_count = 0
            while no_change_count < 3:
                profiles_before_scrape = len(all_scraped_profiles)
                
                profile_elements = driver.find_elements(By.CSS_SELECTOR, "div[role='dialog'] span.xjp7ctv > a")

                for element in profile_elements:
                    name = element.text
                    url = element.get_attribute('href')
                    if name and url:
                        clean_url = url.split('?')[0]
                        all_scraped_profiles[clean_url] = {'profile_name': name, 'profile_url': clean_url}
                
                dialog = driver.find_element(By.CSS_SELECTOR, dialog_selector)
                driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', dialog)
                time.sleep(2.5)
                
                profiles_after_scrape = len(all_scraped_profiles)
                if profiles_after_scrape == profiles_before_scrape:
                    no_change_count += 1
                else:
                    no_change_count = 0
            
            print(f"Scraped from post {i + 1}. Total unique profiles so far: {len(all_scraped_profiles)}")
            
            ActionChains(driver).send_keys(Keys.ESCAPE).perform()
            time.sleep(2)
        except TimeoutException:
            print(f"Skipping post {i + 1} as no reaction dialog appeared.")
            ActionChains(driver).send_keys(Keys.ESCAPE).perform()
            time.sleep(1)
            continue

    return list(all_scraped_profiles.values())

def main():
    os.makedirs(PROFILE_PATH, exist_ok=True)
    
    options = uc.ChromeOptions()
    options.add_argument("--disable-notifications")
    options.add_argument("--lang=en-US")
    options.add_experimental_option('prefs', {'intl.accept_languages': 'en-US,en'})
    options.add_argument(f"--user-data-dir={PROFILE_PATH}")

    with uc.Chrome(options=options, use_subprocess=True) as driver:
        login_to_facebook(driver)
        reactors_data = scrape_page_feed(driver, PAGE_URL)

    if reactors_data:
        with open(OUTPUT_CSV_FILE, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=['profile_name', 'profile_url'])
            writer.writeheader()
            writer.writerows(reactors_data)
        print(f"\nScraping complete. Saved {len(reactors_data)} unique profiles to {OUTPUT_CSV_FILE}.")

if __name__ == "__main__":
    main()

Scrolling the page feed to load posts...
Found 17 posts with reaction buttons to process.

Processing post 1...
Scraped from post 1. Total unique profiles so far: 3

Processing post 2...
Scraped from post 2. Total unique profiles so far: 3

Processing post 3...
Scraped from post 3. Total unique profiles so far: 7

Processing post 4...
Scraped from post 4. Total unique profiles so far: 7

Processing post 5...
Scraped from post 5. Total unique profiles so far: 8

Processing post 6...
Scraped from post 6. Total unique profiles so far: 10

Processing post 7...
Scraped from post 7. Total unique profiles so far: 12

Processing post 8...
Scraped from post 8. Total unique profiles so far: 12

Processing post 9...
Scraped from post 9. Total unique profiles so far: 12

Processing post 10...
Scraped from post 10. Total unique profiles so far: 12

Processing post 11...
Scraped from post 11. Total unique profiles so far: 12

Processing post 12...
Scraped from post 12. Total unique profiles so far: 