In [3]:
!pip install -q requests
from google.colab import drive, files
drive.mount("/content/drive", force_remount=False)

from __future__ import annotations
import csv, datetime as dt, time, random
from pathlib import Path
from typing import List, Dict
import requests, sys
from pprint import pprint

ENDPOINT   = "https://api.studio.thegraph.com/query/111374/bayc-mainnet/v0.0.1"
DRIVE_DIR  = Path("/content/drive/MyDrive/BAYC")
CSV_HEADERS = [
    "id","blockNumber","blockTimestamp",
    "transactionHash","tokenId","from","to"
]
PAGE_SIZE = 1000
REQUESTS_PER_SEC = 2
MAX_ROWS = None

def ensure_dir(p: Path): p.mkdir(parents=True, exist_ok=True)

def gql(after_id: str|None) -> str:
    cursor = f', where: {{ id_gt: "{after_id}" }}' if after_id else ""
    return f"""
{{
  transfers(first: {PAGE_SIZE}{cursor},
            orderBy: id, orderDirection: asc) {{
    id
    blockNumber
    blockTimestamp
    transactionHash
    tokenId
    from
    to
  }}
}}
""".strip()

def fetch_batch(s: requests.Session, after_id: str|None) -> List[Dict]:
    r = s.post(ENDPOINT, json={"query": gql(after_id)}, timeout=30)
    r.raise_for_status()
    data = r.json()
    if "errors" in data:
        pprint(data["errors"]); raise RuntimeError("GraphQL error")
    return data["data"]["transfers"]

def append_csv(path: Path, rows: List[Dict], first_batch: bool):
    mode = "w" if first_batch else "a"
    with path.open(mode, newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, CSV_HEADERS)
        if first_batch: w.writeheader()
        w.writerows(rows)

def export(max_rows: int|None = None) -> Path:
    ensure_dir(DRIVE_DIR)
    ts = dt.datetime.utcnow().strftime("%Y%m%d_%H%M")
    out = DRIVE_DIR / f"bayc_transfers_{ts}.csv"

    sess = requests.Session()
    after, total, batch_no = None, 0, 0
    while True:
        if max_rows and total >= max_rows: break
        rows = fetch_batch(sess, after)
        if not rows:
            print(f"✅ DONE，TOTAL {total} ROWS"); break
        append_csv(out, rows, first_batch=(batch_no==0))
        total += len(rows)
        after = rows[-1]["id"]
        print(f"Batch {batch_no:>4} ✔ {len(rows):>4} rows → 总计 {total}")
        batch_no += 1
        time.sleep(random.uniform(1/REQUESTS_PER_SEC, 1.5/REQUESTS_PER_SEC))
    return out

csv_path = export(MAX_ROWS)
print(f"\nCSV Has been saved to：{csv_path}")



KeyboardInterrupt: 

In [2]:
import pandas as pd
from pathlib import Path

SRC = Path("/content/drive/MyDrive/BAYC/bayc_transfers_20250512_2125.csv")
DST = SRC.with_name(SRC.stem + "_clean.csv")

df = pd.read_csv(SRC)

ts_col = "blockTimestamp" if "blockTimestamp" in df.columns else "timestamp"
df[ts_col] = pd.to_datetime(df[ts_col], unit="s", utc=True)

if "value" in df.columns:
    df["valueEth"] = df["value"] / 1e18

dedup_cols = [c for c in ("transactionHash", "txHash", "id") if c in df.columns]
if "logIndex" in df.columns:
    dedup_cols.append("logIndex")

if dedup_cols:
    df = df.drop_duplicates(subset=dedup_cols, keep="first")
else:
    print("⚠️ Didn't find txHash / transactionHash / id ...rows，Skip")

df = df.sort_values(ts_col).reset_index(drop=True)
df.to_csv(DST, index=False)

print(f"✅ Done：{len(df):,} Rows → {DST}")




✅ Done：306,306 Rows → /content/drive/MyDrive/BAYC/bayc_transfers_20250512_2125_clean.csv
