### Code Scraping : Facebook Group

In [2]:
import os
import time
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

FACEBOOK_EMAIL = os.getenv('FACEBOOK_EMAIL')
FACEBOOK_PASSWORD = os.getenv('FACEBOOK_PASSWORD')
GROUP_URL = 'https://www.facebook.com/groups/247152564671716'
SCROLL_COUNT = 5
OUTPUT_FILENAME = 'test-scraping.txt'
PROFILE_PATH = r'C:\chrome-profiles\fb-scraper'

def scrape_post_details(driver, post_element):
    details = {}
    
    see_more = post_element.find_elements(By.XPATH, ".//div[text()='See more' or text()='ดูเพิ่มเติม']")
    if see_more:
        driver.execute_script("arguments[0].click();", see_more[0])
        time.sleep(0.3)

    author_link = post_element.find_elements(By.CSS_SELECTOR, "h3 a[role='link']")
    if author_link:
        details["author_name"] = author_link[0].text
        details["author_url"] = author_link[0].get_attribute('href')

    content_divs = post_element.find_elements(By.CSS_SELECTOR, "div[data-ad-preview='message'], div[dir='auto']")
    if content_divs:
        details["post_content"] = "\n".join(div.text for div in content_divs if div.text.strip())

    timestamp_link = post_element.find_elements(By.CSS_SELECTOR, "span > a[role='link'][href*='/posts/'], span > a[role='link'][href*='?post_id=']")
    if timestamp_link:
        details["post_timestamp"] = timestamp_link[0].text
        details["post_url"] = timestamp_link[0].get_attribute('href')

    footer = post_element.find_elements(By.CSS_SELECTOR, "div[role='toolbar']")
    if footer:
        reactions = footer[0].find_elements(By.CSS_SELECTOR, "span[aria-label*='reaction']")
        details["reactions"] = reactions[0].get_attribute('aria-label') if reactions else "0"
        
        comments = footer[0].find_elements(By.XPATH, ".//div[contains(text(), 'comment') or contains(text(), 'ความคิดเห็น')]")
        details["comments"] = comments[0].text if comments else "0 comments"
    else:
        details["reactions"] = "0"
        details["comments"] = "0 comments"

    if details.get("author_name") and details.get("post_content"):
        return details
    return None

def main():
    options = uc.ChromeOptions()
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-notifications")
    options.add_argument(f"--user-data-dir={PROFILE_PATH}")
    
    with uc.Chrome(options=options, use_subprocess=True, version_main=137) as driver:
        driver.get(GROUP_URL)

        try:
            email_input = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.NAME, "email"))
            )
            email_input.send_keys(FACEBOOK_EMAIL)
            driver.find_element(By.NAME, "pass").send_keys(FACEBOOK_PASSWORD, Keys.RETURN)
        except TimeoutException:
            pass

        WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[role='feed']")))
        
        for _ in range(SCROLL_COUNT):
            last_height = driver.execute_script("return document.body.scrollHeight")
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            try:
                WebDriverWait(driver, 5, 0.5).until(
                    lambda d: d.execute_script("return document.body.scrollHeight") > last_height
                )
            except TimeoutException:
                break
        
        posts_data = []
        posts = driver.find_elements(By.CSS_SELECTOR, "div[role='article']")
        for post in posts:
            details = scrape_post_details(driver, post)
            if details:
                posts_data.append(details)

        if posts_data:
            with open(OUTPUT_FILENAME, 'w', encoding='utf-8') as f:
                for i, post in enumerate(posts_data, 1):
                    f.write(f"=============== POST #{i} ===============\n")
                    f.write(f"Author: {post.get('author_name', 'N/A')}\n")
                    f.write(f"Author URL: {post.get('author_url', 'N/A')}\n")
                    f.write(f"Timestamp: {post.get('post_timestamp', 'N/A')}\n")
                    f.write(f"Post URL: {post.get('post_url', 'N/A')}\n")
                    f.write(f"Reactions: {post.get('reactions', 'N/A')}\n")
                    f.write(f"Comments: {post.get('comments', 'N/A')}\n")
                    f.write("-" * 20 + " CONTENT " + "-" * 20 + "\n")
                    f.write(f"{post.get('post_content', 'No content found.')}\n\n\n")

if __name__ == "__main__":
    main()

### Code Scraping : Facebook Page

In [None]:
import time
import os
from dotenv import load_dotenv
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

load_dotenv()

FACEBOOK_EMAIL = os.getenv('FACEBOOK_EMAIL')
FACEBOOK_PASSWORD = os.getenv('FACEBOOK_PASSWORD')
PAGE_URL = 'https://www.facebook.com/luminarijewelry'
SCROLL_COUNT = 5
OUTPUT_FILENAME = 'facebook_page_posts.txt'
PROFILE_PATH = r'C:\chrome-profiles\fb-scraper'

def scrape_page_post_details(driver, post_element):
    details = {}
    
    see_more = post_element.find_elements(By.XPATH, ".//div[text()='See more' or text()='ดูเพิ่มเติม']")
    if see_more:
        driver.execute_script("arguments[0].click();", see_more[0])
        time.sleep(0.5)

    author_link = post_element.find_elements(By.CSS_SELECTOR, "h2 a[role='link']")
    details["author_name"] = author_link[0].text if author_link else "Page"
    details["author_url"] = author_link[0].get_attribute('href') if author_link else None

    content_divs = post_element.find_elements(By.CSS_SELECTOR, "div[data-ad-preview='message'], div[style='text-align: start;']")
    details["post_content"] = "\n".join(div.text for div in content_divs if div.text.strip()) if content_divs else None

    timestamp_link = post_element.find_elements(By.CSS_SELECTOR, "span > a[role='link'][href*='story_fbid='], span > a[role='link'][href*='/posts/']")
    if timestamp_link:
        details["post_timestamp"] = timestamp_link[0].text
        details["post_url"] = timestamp_link[0].get_attribute('href')

    feedback_container = post_element.find_elements(By.CSS_SELECTOR, "div[aria-label*='reactions'], div[role='toolbar']")
    if feedback_container:
        reactions = feedback_container[0].find_elements(By.CSS_SELECTOR, "span[aria-label]")
        details["reactions"] = reactions[0].get_attribute('aria-label') if reactions else "0"
        comments = feedback_container[0].find_elements(By.XPATH, ".//div[contains(text(), 'comment') or contains(text(), 'ความคิดเห็น')]")
        details["comments"] = comments[0].text if comments else "0 comments"
    else:
        details["reactions"] = "0"
        details["comments"] = "0 comments"

    return details if details.get("author_name") and details.get("post_content") else None

def main():
    options = uc.ChromeOptions()
    options.add_argument("--disable-notifications")
    options.add_argument(f"--user-data-dir={PROFILE_PATH}")
    
    with uc.Chrome(options=options, use_subprocess=True, version_main=137) as driver:
        driver.get(PAGE_URL)

        email_input = driver.find_elements(By.NAME, "email")
        if email_input:
            email_input[0].send_keys(FACEBOOK_EMAIL)
            driver.find_element(By.NAME, "pass").send_keys(FACEBOOK_PASSWORD, Keys.RETURN)

        WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[role='main']")))

        last_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(SCROLL_COUNT):
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        posts_data = []
        posts = driver.find_elements(By.CSS_SELECTOR, "div[role='article']")
        for post in posts:
            details = scrape_page_post_details(driver, post)
            if details:
                posts_data.append(details)

        if posts_data:
            with open(OUTPUT_FILENAME, 'w', encoding='utf-8') as f:
                for i, post in enumerate(posts_data, 1):
                    f.write(f"=============== POST #{i} ===============\n"
                           f"Author: {post.get('author_name', 'N/A')}\n"
                           f"Author URL: {post.get('author_url', 'N/A')}\n"
                           f"Timestamp: {post.get('post_timestamp', 'N/A')}\n"
                           f"Post URL: {post.get('post_url', 'N/A')}\n"
                           f"Reactions: {post.get('reactions', 'N/A')}\n"
                           f"Comments: {post.get('comments', 'N/A')}\n"
                           "-" * 20 + " CONTENT " + "-" * 20 + "\n"
                           f"{post.get('post_content', 'No content found.')}\n\n\n")

if __name__ == "__main__":
    if not FACEBOOK_EMAIL or not FACEBOOK_PASSWORD:
        raise ValueError("Missing Facebook credentials in .env file")
    main()

Logging in...
Navigating to post: https://www.facebook.com/luminarijewelry
An error occurred while scraping likers: Message: 
Stacktrace:
	GetHandleVerifier [0x0xd03b03+62899]
	GetHandleVerifier [0x0xd03b44+62964]
	(No symbol) [0x0xb310f3]
	(No symbol) [0x0xb7980e]
	(No symbol) [0x0xb79bab]
	(No symbol) [0x0xbc25c2]
	(No symbol) [0x0xb9e554]
	(No symbol) [0x0xbbfd81]
	(No symbol) [0x0xb9e306]
	(No symbol) [0x0xb6d670]
	(No symbol) [0x0xb6e4e4]
	GetHandleVerifier [0x0xf64793+2556483]
	GetHandleVerifier [0x0xf5fd02+2537394]
	GetHandleVerifier [0x0xd2a2fa+220586]
	GetHandleVerifier [0x0xd1aae8+157080]
	GetHandleVerifier [0x0xd2141d+184013]
	GetHandleVerifier [0x0xd0ba68+95512]
	GetHandleVerifier [0x0xd0bc10+95936]
	GetHandleVerifier [0x0xcf6b5a+9738]
	BaseThreadInitThunk [0x0x75b05d49+25]
	RtlInitializeExceptionChain [0x0x7706d1ab+107]
	RtlGetAppContainerNamedObjectPath [0x0x7706d131+561]

No profiles were scraped. The post might have no likes or the selectors might need an update.


### Scraping : post, Share, Comment

In [2]:
import time
import os
import csv
from dotenv import load_dotenv
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException

load_dotenv()
FACEBOOK_EMAIL = os.getenv('FACEBOOK_EMAIL')
FACEBOOK_PASSWORD = os.getenv('FACEBOOK_PASSWORD')
PAGE_URL = 'https://www.facebook.com/kosjewelry.co'
TARGET_POST_COUNT = 100
OUTPUT_CSV_FILE = 'post_urls.csv'
PROFILE_PATH = r'C:\chrome-profiles\fb-pipeline-stage1-persistent'

def login_to_facebook(driver):
    driver.get("https://www.facebook.com")
    time.sleep(3)
    cookie_selectors = [
        "button[data-cookiebanner='accept_button_dialog']",
        "button[title='Allow all cookies']",
        "button[title='Accept All']",
    ]
    for selector in cookie_selectors:
        buttons = driver.find_elements(By.CSS_SELECTOR, selector)
        if buttons and buttons[0].is_displayed():
            buttons[0].click()
            time.sleep(2)
            break

    email_input = driver.find_elements(By.ID, "email")
    pass_input = driver.find_elements(By.ID, "pass")
    if email_input and pass_input:
        email_input[0].send_keys(FACEBOOK_EMAIL)
        pass_input[0].send_keys(FACEBOOK_PASSWORD)
        pass_input[0].send_keys(Keys.RETURN)
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "a[aria-label='Home']"))
        )

def collect_post_urls(driver, page_url):
    driver.get(page_url)
    WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[role='main']")))
    time.sleep(3)
    urls_in_this_session = set()
    last_height = driver.execute_script("return document.body.scrollHeight")
    scroll_attempts = 0
    while scroll_attempts < 50:
        js_script = """
            var links = document.querySelectorAll("a[href*='/posts/'], a[href*='/videos/'], a[href*='/reels/']");
            var hrefs = [];
            for (var i = 0; i < links.length; i++) {
                hrefs.push(links[i].getAttribute('href'));
            }
            return hrefs;
        """
        hrefs_list = driver.execute_script(js_script)
        for url in hrefs_list:
            if url:
                clean_url = url.split('?')[0]
                urls_in_this_session.add(clean_url)
        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(4)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
        scroll_attempts += 1
    return urls_in_this_session

def main():
    all_found_urls = set()
    while len(all_found_urls) < TARGET_POST_COUNT:
        previous_count = len(all_found_urls)
        options = uc.ChromeOptions()
        options.add_argument("--disable-notifications")
        options.add_argument("--lang=en-US")
        options.add_argument(f"--user-data-dir={PROFILE_PATH}")
        with uc.Chrome(options=options, use_subprocess=True) as driver:
            login_to_facebook(driver)
            newly_scraped_urls = collect_post_urls(driver, PAGE_URL)
        all_found_urls.update(newly_scraped_urls)
        if len(all_found_urls) == previous_count and previous_count > 0:
            break
    if all_found_urls:
        final_urls = list(all_found_urls)[:TARGET_POST_COUNT]
        with open(OUTPUT_CSV_FILE, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(['PostURL'])
            for url in final_urls:
                writer.writerow([url])

if __name__ == "__main__":
    main()