In [None]:
# =====================================================
# Pegos Twitter Scraper (Cookie-based, no date filter)
# =====================================================
import os, time, random, traceback
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager


In [None]:
# --------------------------
# ENV deƒüi≈ükenleri (GitHub Secrets)
# --------------------------
AUTH_TOKEN = os.getenv("AUTH_TOKEN")
CT0 = os.getenv("CT0")
OUT_PATH = "/tmp/pegos_output.csv"

if not AUTH_TOKEN or not CT0:
    raise RuntimeError("AUTH_TOKEN veya CT0 tanƒ±mlƒ± deƒüil (GitHub Secrets kƒ±smƒ±na ekle).")

In [None]:
# --------------------------
# Chrome ba≈ülat (headless)
# --------------------------
opts = Options()
opts.add_argument("--headless=new")
opts.add_argument("--no-sandbox")
opts.add_argument("--disable-dev-shm-usage")
opts.add_argument("--disable-gpu")
opts.add_argument("--window-size=1920,1080")
opts.add_argument("--disable-blink-features=AutomationControlled")
opts.add_experimental_option("excludeSwitches", ["enable-automation"])
opts.add_experimental_option("useAutomationExtension", False)

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=opts)


In [None]:
# --------------------------
# Login with cookies
# --------------------------
driver.get("https://x.com")
time.sleep(3)
driver.add_cookie({"name": "auth_token", "value": AUTH_TOKEN, "domain": ".x.com"})
driver.add_cookie({"name": "ct0", "value": CT0, "domain": ".x.com"})
driver.refresh()
time.sleep(3)
print("‚úÖ Cookies set, current URL:", driver.current_url)


In [None]:
# --------------------------
# Tweet scraping (no date filter)
# --------------------------
KEYWORDS = ['blockchain', 'cryptocurrency', 'bitcoin', 'ethereum']
tweetArr = []

for kw in KEYWORDS:
    print(f"üîé Searching for: {kw}")
    driver.get(f"https://x.com/search?q={kw}&src=typed_query&f=live")
    time.sleep(4)

    # Sayfayƒ± 40 defa kaydƒ±r
    for _ in range(40):
        driver.execute_script("window.scrollBy(0, 1200);")
        time.sleep(random.uniform(0.8, 1.3))

        html = driver.page_source
        soup = BeautifulSoup(html, "html.parser")
        articles = soup.find_all("article") or []

        for art in articles:
            try:
                tag = art.find(attrs={"data-testid": "tweetText"})
                if not tag:
                    continue

                text = tag.get_text(" ", strip=True)
                time_tag = art.find("time")
                time_str = time_tag["datetime"] if time_tag else None

                # Etkile≈üim sayƒ±larƒ±
                counts = art.find_all(attrs={"data-testid": "app-text-transition-container"})
                vals = [0, 0, 0, 0]

                for i, c in enumerate(counts[:4]):
                    v = (c.text or "").replace(" ", "")
                    try:
                        if v.endswith("B"):
                            val = int(float(v[:-1]) * 1000)
                        elif v.endswith("Mn") or v.endswith("M"):
                            val = int(float(v[:-2]) * 1_000_000)
                        else:
                            val = int(float(v)) if v else 0
                    except:
                        val = 0
                    vals[i] = val

                tweetArr.append({
                    "keyword": kw,
                    "tweet": text,
                    "time": time_str,
                    "comment": vals[0],
                    "retweet": vals[1],
                    "like": vals[2],
                    "see_count": vals[3]
                })

            except Exception as e:
                # Tweet parsing sƒ±rasƒ±nda bir hata olursa atla
                print("‚ö†Ô∏è Parse error:", e)

    print(f"‚úÖ Finished {kw}: total tweets so far {len(tweetArr)}")

driver.quit()

In [None]:
# --------------------------
# Save to CSV
# --------------------------
df = pd.DataFrame(tweetArr)
if df.empty:
    print("‚ö†Ô∏è No tweets collected.")
else:
    df.to_csv(OUT_PATH, index=False)
    print(f"üíæ Saved to {OUT_PATH}, total {len(df)} rows.")