In [2]:
import requests
from bs4 import BeautifulSoup
import sqlite3
import time

BASE_URL = "https://github.com/google?page={}"

# --- データベース準備 ----------------------------------------------------
conn = sqlite3.connect("repositories.db")
cur = conn.cursor()

cur.execute("""
CREATE TABLE IF NOT EXISTS repositories (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT,
    language TEXT,
    stars INTEGER
)
""")
conn.commit()

# --- スクレイピング -------------------------------------------------------
def scrape_repos():
    for page in range(1, 3):  # 必要に応じてページ数調整
        print(f"Scraping page {page} ...")
        url = BASE_URL.format(page)
        res = requests.get(url)
        soup = BeautifulSoup(res.text, "html.parser")

        repos = soup.select("li.Box-row")

        for repo in repos:
            # リポジトリ名
            name_tag = repo.select_one("a[href*='/google/']")
            name = name_tag.text.strip() if name_tag else None

            # 主要言語
            lang_tag = repo.select_one("span[itemprop='programmingLanguage']")
            language = lang_tag.text.strip() if lang_tag else None

            # スター数
            star_tag = repo.select_one("a[href$='/stargazers']")
            stars = star_tag.text.strip().replace(",", "") if star_tag else "0"
            stars = int(stars)

            print(name, language, stars)

            # DB に保存
            cur.execute(
                "INSERT INTO repositories (name, language, stars) VALUES (?, ?, ?)",
                (name, language, stars)
            )
            conn.commit()

        time.sleep(1)  # GitHubに負荷をかけないため絶対必要

# --- 表示 ---------------------------------------------------------------
def show_data():
    print("\nSaved Data:")
    for row in cur.execute("SELECT * FROM repositories"):
        print(row)


# --- 実行 ---------------------------------------------------------------
if __name__ == "__main__":
    scrape_repos()
    show_data()


Scraping page 1 ...
nomulus Java 1766
skia C++ 10264
xls C++ 1374
tunix Python 1905
mug Java 444
dive C++ 17
gn-language-server Rust 18
gvisor Go 17224
budoux-extension TypeScript 16
tabuli C++ 35
Scraping page 2 ...
nomulus Java 1766
skia C++ 10264
xls C++ 1374
tunix Python 1905
mug Java 444
dive C++ 17
gn-language-server Rust 18
gvisor Go 17224
budoux-extension TypeScript 16
tabuli C++ 35

Saved Data:
(1, 'nomulus', 'Java', 1766)
(2, 'skia', 'C++', 10264)
(3, 'xls', 'C++', 1374)
(4, 'tunix', 'Python', 1905)
(5, 'mug', 'Java', 444)
(6, 'dive', 'C++', 17)
(7, 'gn-language-server', 'Rust', 18)
(8, 'gvisor', 'Go', 17224)
(9, 'budoux-extension', 'TypeScript', 16)
(10, 'tabuli', 'C++', 35)
(11, 'nomulus', 'Java', 1766)
(12, 'skia', 'C++', 10264)
(13, 'xls', 'C++', 1374)
(14, 'tunix', 'Python', 1905)
(15, 'mug', 'Java', 444)
(16, 'dive', 'C++', 17)
(17, 'gn-language-server', 'Rust', 18)
(18, 'gvisor', 'Go', 17224)
(19, 'budoux-extension', 'TypeScript', 16)
(20, 'tabuli', 'C++', 35)
