### Code Scraping : Facebook Group

In [None]:
import time
import os
from dotenv import load_dotenv
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

load_dotenv()

FACEBOOK_EMAIL = os.getenv('FACEBOOK_EMAIL')
FACEBOOK_PASSWORD = os.getenv('FACEBOOK_PASSWORD')
GROUP_URL = 'https://www.facebook.com/groups/247152564671716'
SCROLL_COUNT = 5
OUTPUT_FILENAME = 'test-scraping.txt'
PROFILE_PATH = r'C:\chrome-profiles\fb-scraper'

def scrape_post_details(driver, post_element):
    details = {}
    
    see_more = post_element.find_elements(By.XPATH, ".//div[text()='See more' or text()='ดูเพิ่มเติม']")
    if see_more:
        driver.execute_script("arguments[0].click();", see_more[0])
        time.sleep(0.5)

    author_link = post_element.find_elements(By.CSS_SELECTOR, "h3 a[role='link']")
    if author_link:
        details["author_name"] = author_link[0].text
        details["author_url"] = author_link[0].get_attribute('href')

    content_divs = post_element.find_elements(By.CSS_SELECTOR, "div[data-ad-preview='message'], div[dir='auto']")
    if content_divs:
        details["post_content"] = "\n".join(div.text for div in content_divs if div.text.strip())

    timestamp_link = post_element.find_elements(By.CSS_SELECTOR, "span > a[role='link'][href*='/posts/'], span > a[role='link'][href*='?post_id=']")
    if timestamp_link:
        details["post_timestamp"] = timestamp_link[0].text
        details["post_url"] = timestamp_link[0].get_attribute('href')

    footer = post_element.find_elements(By.CSS_SELECTOR, "div[role='toolbar']")
    if footer:
        reactions = footer[0].find_elements(By.CSS_SELECTOR, "span[aria-label*='reaction']")
        details["reactions"] = reactions[0].get_attribute('aria-label') if reactions else "0"
        
        comments = footer[0].find_elements(By.XPATH, ".//div[contains(text(), 'comment') or contains(text(), 'ความคิดเห็น')]")
        details["comments"] = comments[0].text if comments else "0 comments"
    else:
        details["reactions"] = "0"
        details["comments"] = "0 comments"

    return details if details.get("author_name") and details.get("post_content") else None

def main():
    options = uc.ChromeOptions()
    options.add_argument("--disable-notifications")
    options.add_argument(f"--user-data-dir={PROFILE_PATH}")
    
    with uc.Chrome(options=options, use_subprocess=True) as driver:
        driver.get(GROUP_URL)

        email_input = driver.find_elements(By.NAME, "email")
        if email_input:
            email_input[0].send_keys(FACEBOOK_EMAIL)
            driver.find_element(By.NAME, "pass").send_keys(FACEBOOK_PASSWORD, Keys.RETURN)

        WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[role='feed']")))

        last_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(SCROLL_COUNT):
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        posts_data = []
        posts = driver.find_elements(By.CSS_SELECTOR, "div[role='article']")
        for post in posts:
            details = scrape_post_details(driver, post)
            if details:
                posts_data.append(details)

        if posts_data:
            with open(OUTPUT_FILENAME, 'w', encoding='utf-8') as f:
                for i, post in enumerate(posts_data, 1):
                    f.write(f"=============== POST #{i} ===============\n")
                    f.write(f"Author: {post.get('author_name', 'N/A')}\n")
                    f.write(f"Author URL: {post.get('author_url', 'N/A')}\n")
                    f.write(f"Timestamp: {post.get('post_timestamp', 'N/A')}\n")
                    f.write(f"Post URL: {post.get('post_url', 'N/A')}\n")
                    f.write(f"Reactions: {post.get('reactions', 'N/A')}\n")
                    f.write(f"Comments: {post.get('comments', 'N/A')}\n")
                    f.write("-" * 20 + " CONTENT " + "-" * 20 + "\n")
                    f.write(f"{post.get('post_content', 'No content found.')}\n\n\n")

if __name__ == "__main__":
    main()