In [1]:
import csv
import time
from datetime import datetime, timezone

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException, TimeoutException

In [2]:
def parse_iso(dt_str: str | None) -> datetime | None:
    if not dt_str:
        return None
    try:
        if dt_str.endswith("Z"):
            dt_str = dt_str.replace("Z", "+00:00")
        return datetime.fromisoformat(dt_str)
    except Exception:
        return None

In [3]:
def safe_js_click(driver, element) -> None:
    driver.execute_script("arguments[0].scrollIntoView({block: \"center\"});", element)
    driver.execute_script("arguments[0].click();", element)

In [4]:
def extract_from_tweet_article(driver, article):
    """
    Returns dict with: url, time_iso, content
    or None if we cannot extract a permalink/time.
    """
    # 1) Get time element (stable) and its permalink (ancestor <a>)
    try:
        time_el = article.find_element(By.CSS_SELECTOR, "time")
        time_iso = time_el.get_attribute("datetime")  # e.g., 2025-11-03T19:33:53.000Z

        # The permalink is the <a> that contains the <time>
        link_el = time_el.find_element(By.XPATH, "./ancestor::a[1]")
        url = link_el.get_attribute("href")
        if url:
            url = url.split("?")[0]
        else:
            return None
    except NoSuchElementException:
        return None

    # 2) Click "Show more" inside this tweet if present
    try:
        more_btn = article.find_element(By.CSS_SELECTOR, "button[data-testid=\"tweet-text-show-more-link\"]")
        safe_js_click(driver, more_btn)
        time.sleep(0.1)
    except NoSuchElementException:
        pass

    # 3) Extract text (may be empty for media-only tweets)
    content = ""
    try:
        text_el = article.find_element(By.CSS_SELECTOR, "div[data-testid=\"tweetText\"]")
        content = text_el.text.strip()
    except NoSuchElementException:
        content = ""

    return {"url": url, "time_iso": time_iso or "", "content": content}

In [5]:
def main():
    username = "MayorBowser"
    url = f"https://x.com/{username}"
    output_csv = f"{username}_tweets.csv"

    # How many unique posts you want
    target_posts = 200

    # Scrolling behavior
    max_scrolls = 300
    scroll_pause = 1.8
    no_progress_limit = 20

    options = webdriver.ChromeOptions()
    options.page_load_strategy = "eager"
    # options.add_argument("--headless=new")  # keep visible while debugging

    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 20)

    try:
        print("Opening:", url)
        try:
            driver.set_page_load_timeout(25)
            driver.get(url)
        except TimeoutException:
            driver.execute_script("window.stop();")

        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "article[data-testid=\"tweet\"]")))

        seen = {}  # url -> row dict
        no_progress = 0

        for i in range(max_scrolls):
            before = len(seen)

            articles = driver.find_elements(By.CSS_SELECTOR, "article[data-testid=\"tweet\"]")
            for article in articles:
                try:
                    row = extract_from_tweet_article(driver, article)
                    if not row:
                        continue
                    if row["url"] not in seen:
                        seen[row["url"]] = row
                except StaleElementReferenceException:
                    continue

            after = len(seen)
            print(f"Scroll {i+1}/{max_scrolls}: collected {after} unique posts")

            if after >= target_posts:
                break

            if after == before:
                no_progress += 1
            else:
                no_progress = 0

            if no_progress >= no_progress_limit:
                print("Stopping: no new posts loaded for many scrolls.")
                break

            driver.execute_script("window.scrollBy(0, 1400);")
            time.sleep(scroll_pause)

        # Convert to list + sort newest -> oldest
        rows = list(seen.values())

        def sort_key(r):
            dt = parse_iso(r.get("time_iso"))
            return dt if dt else datetime(1970, 1, 1, tzinfo=timezone.utc)

        rows.sort(key=sort_key, reverse=True)

        # Write CSV
        with open(output_csv, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=["time_iso", "content", "url"])
            writer.writeheader()
            writer.writerows(rows)

        print(f"Saved {len(rows)} posts to {output_csv}")

    finally:
        driver.quit()


if __name__ == "__main__":
    main()

Opening: https://x.com/MayorBowser
Scroll 1/300: collected 2 unique posts
Scroll 2/300: collected 7 unique posts
Scroll 3/300: collected 11 unique posts
Scroll 4/300: collected 17 unique posts
Scroll 5/300: collected 19 unique posts
Scroll 6/300: collected 22 unique posts
Scroll 7/300: collected 25 unique posts
Scroll 8/300: collected 29 unique posts
Scroll 9/300: collected 30 unique posts
Scroll 10/300: collected 33 unique posts
Scroll 11/300: collected 38 unique posts
Scroll 12/300: collected 45 unique posts
Scroll 13/300: collected 48 unique posts
Scroll 14/300: collected 53 unique posts
Scroll 15/300: collected 56 unique posts
Scroll 16/300: collected 59 unique posts
Scroll 17/300: collected 63 unique posts
Scroll 18/300: collected 66 unique posts
Scroll 19/300: collected 70 unique posts
Scroll 20/300: collected 74 unique posts
Scroll 21/300: collected 80 unique posts
Scroll 22/300: collected 84 unique posts
Scroll 23/300: collected 90 unique posts
Scroll 24/300: collected 93 uniqu