In [3]:
import requests
from bs4 import BeautifulSoup
import sqlite3
import time

conn = sqlite3.connect("google_repos.db")
cur = conn.cursor()

cur.execute("""
CREATE TABLE IF NOT EXISTS repos (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT,
    language TEXT,
    stars INTEGER
)
""")
conn.commit()


URL = "https://github.com/google?tab=repositories"
res = requests.get(URL)
soup = BeautifulSoup(res.text, "html.parser")

repo_list = soup.select("li source")

repos = soup.select("li.Box-row")

for repo in repos:
   
    name = repo.select_one("a[data-hovercard-type='repository']").text.strip()

    lang_tag = repo.select_one("span[itemprop='programmingLanguage']")
    language = lang_tag.text.strip() if lang_tag else None

    star_tag = repo.select("a.Link--muted")[0].text.strip()
    if "k" in star_tag:
        stars = int(float(star_tag.replace("k", "")) * 1000)
    else:
        stars = int(star_tag.replace(",", ""))

    cur.execute(
        "INSERT INTO repos (name, language, stars) VALUES (?, ?, ?)",
        (name, language, stars)
    )
    conn.commit()

    print(f"Saved: {name} / {language} / {stars}")

    time.sleep(1)  



cur.execute("SELECT * FROM repos")
rows = cur.fetchall()

print("DBに保存されたデータ")
for r in rows:
    print(r)

conn.close()

Saved: dwh-migration-tools / Java / 54
Saved: nomulus / Java / 1768
Saved: perfetto / C++ / 5019
Saved: koladata / C++ / 27
Saved: desugar_jdk_libs / Java / 389
Saved: conscrypt / Java / 1358
Saved: tcmalloc / C++ / 5001
Saved: dawn / C++ / 781
Saved: tunix / Python / 1916
Saved: meridian / Python / 1186
DBに保存されたデータ
(1, 'perfetto', 'C++', 4975)
(2, 'skia-buildbot', 'Go', 157)
(3, 'cel-java', 'Java', 227)
(4, 'angle', 'C++', 3841)
(5, 'site-kit-wp', 'JavaScript', 1335)
(6, 'osv-scanner', 'Go', 8073)
(7, 'device-infra', 'Java', 58)
(8, 'adk-java', 'Java', 945)
(9, 'tunix', 'Python', 1898)
(10, 'osv-scalibr', 'Go', 532)
(11, 'go-containerregistry', 'Go', 3597)
(12, 'angle', 'C++', 3841)
(13, 'skia', 'C++', 10261)
(14, 'go-units', 'Go', 7)
(15, 'osv-scanner', 'Go', 8073)
(16, 'perfetto', 'C++', 4975)
(17, 'skia-buildbot', 'Go', 157)
(18, 'cel-java', 'Java', 227)
(19, 'site-kit-wp', 'JavaScript', 1335)
(20, 'device-infra', 'Java', 58)
(21, 'device-infra', 'Java', 58)
(22, 'orbax', 'Python',