In [23]:
import math, time, sqlite3, requests
from bs4 import BeautifulSoup

ORG   = "google"
PAGE_URL = "https://github.com/{org}?page={page}&tab=repositories".format
HEADERS  = {"User-Agent": "Mozilla/5.0 (compatible; google-repo-scraper)"}

conn = sqlite3.connect("google_repos.db")
cur  = conn.cursor()
cur.execute("""
CREATE TABLE IF NOT EXISTS repos(
    id       INTEGER PRIMARY KEY AUTOINCREMENT,
    name     TEXT UNIQUE,
    language TEXT,
    stars    INTEGER
)""")
conn.commit()

def star_to_int(text: str) -> int:
    text = text.strip().lower().replace(',', '')
    if text.endswith('k'):
        return int(float(text[:-1]) * 1_000)
    if text.endswith('m'):
        return int(float(text[:-1]) * 1_000_000)
    return int(text or 0)

def get_repo_anchor(row):
    for sel in ("h3 a",
                "a[data-hovercard-type='repository']",
                'a[itemprop="name codeRepository"]'):
        a = row.select_one(sel)
        if a:
            return a
    return None

def parse_repo_row(row):
    a_tag = get_repo_anchor(row)
    if not a_tag:
        return None
    href  = a_tag.get("href", "")
    repo  = href.split('/')[-1] or a_tag.get_text(strip=True)

    lang_tag = row.select_one('span[itemprop="programmingLanguage"]')
    lang     = lang_tag.get_text(strip=True) if lang_tag else None

    star_tag = row.select_one('a[href$="/stargazers"]')
    stars    = star_to_int(star_tag.get_text(strip=True)) if star_tag else 0

    return repo, lang, stars

page = 1
while True:
    url = PAGE_URL(org=ORG, page=page)
    print(f"[+] fetch {url}")
    r = requests.get(url, headers=HEADERS, timeout=30)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    rows = soup.select("li.Box-row")
    if not rows:      # 念のため
        break

    for row in rows:
        result = parse_repo_row(row)
        if not result:
            continue
        name, lang, stars = result
        print(f"    {name:35} {lang or 'N/A':12} {stars:7}")
        cur.execute("""
            INSERT OR REPLACE INTO repos(name, language, stars)
            VALUES (?,?,?)
        """, (name, lang, stars))
    conn.commit()

    # --- ここで「次のページ」が無ければ終了 ----------------------
    next_btn = soup.select_one('a.next_page')
    if not next_btn or 'disabled' in next_btn.get('class', []):
        break
    # -----------------------------------------------------------

    page += 1
    time.sleep(1)

print("\n=== crawl finished ===\n")

print(f"{'Repository':30} | {'Language':10} | Stars")
print("-"*55)
for n, l, s in cur.execute(
        "SELECT name, language, stars FROM repos ORDER BY stars DESC"):
    print(f"{n:30} | {l or 'N/A':10} | {s}")

conn.close()

[+] fetch https://github.com/google?page=1&tab=repositories
    skia                                C++            10264
    angle                               C++             3841
    dawn                                C++              772
    dwh-migration-tools                 Java              54
    zerocopy                            Rust            2075
    nomulus                             Java            1766
    oss-fuzz-vulns                      Python           166
    perfetto                            C++             4983
    oss-fuzz                            Shell          11647
    toucan                              C++               48

=== crawl finished ===

Repository                     | Language   | Stars
-------------------------------------------------------
oss-fuzz                       | Shell      | 11647
skia                           | C++        | 10264
perfetto                       | C++        | 4983
angle                          | C++      

In [None]:
import time, sqlite3, requests
from bs4 import BeautifulSoup

ORG  = "google"
PAGE = "https://github.com/orgs/{org}/repositories?page={page}".format
UA   = {"User-Agent": "Mozilla/5.0 (compatible; google-repo-scraper)"}

conn = sqlite3.connect("google_repos.db")
cur  = conn.cursor()
cur.execute("""CREATE TABLE IF NOT EXISTS repos(
                 id INTEGER PRIMARY KEY AUTOINCREMENT,
                 name TEXT UNIQUE, language TEXT, stars INTEGER)""")

def stars_to_int(txt):
    txt = txt.lower().replace(',', '').strip()
    return int(float(txt[:-1]) * 1_000) if txt.endswith('k') else (
           int(float(txt[:-1]) * 1_000_000) if txt.endswith('m') else
           int(txt or 0))

def parse(row):
    a = row.select_one("h3 a")                     # <h3><a …>repo</a></h3>
    name = a['href'].split('/')[-1]
    lang = (row.select_one('[itemprop="programmingLanguage"]')
            or {}).get_text(strip=True) if row else None
    stars = stars_to_int(row.select_one('a[href$="/stargazers"]')
                         .get_text(strip=True))
    return name, lang, stars

page = 1
while True:
    url = PAGE(org=ORG, page=page)
    print("[+] GET", url)
    soup = BeautifulSoup(requests.get(url, headers=UA, timeout=30).text,
                         "html.parser")

    rows = soup.select("li.Box-row")              # 30 行（=1 ページ）
    if not rows:
        break

    for r in rows:
        cur.execute("INSERT OR REPLACE INTO repos(name, language, stars)"
                    "VALUES (?,?,?)", parse(r))
    conn.commit()

    # 次ページが無ければ break
    if not soup.select_one('a[rel="next"]'):
        break
    page += 1
    time.sleep(1)

print("総件数:", cur.execute("SELECT COUNT(*) FROM repos").fetchone()[0])
conn.close()