In [None]:
import requests
import pandas as pd
import time
from sqlalchemy import create_engine


headers = {"Authorization":f"Bearer {git_token}",
           "Accept": "application/vnd.github+json"}

keyword = ["robotics", "ROS", "robot arm", "robot", "amr"]

delay = 2

rds_user = "root"
rds_pwd = "ckehdgus9v99nw!"
rds_host = "database-1.ctai4u0ayj03.ap-northeast-2.rds.amazonaws.com"
rds_port = 3306
rds_db = "amrbase"
table_name = "github_repo"

res = requests.get("https://api.github.com/user", headers=headers)

per_page = 50
max_page = 3

In [2]:
engine_url = f"mysql+pymysql://{rds_user}:{rds_pwd}@{rds_host}:{rds_port}/{rds_db}"
engine = create_engine(engine_url)

In [3]:
def search_repos(keyword, language=None):
    repos = []
    for page in range(1, max_page + 1):
        query = keyword
        if language:
            query += f" language:{language}"
        url = "https://api.github.com/search/repositories"
        params = {"q": query, "sort": "stars", "order": "desc", "per_page": per_page, "page": page}
        r = requests.get(url, headers=headers, params=params)
        if r.status_code != 200:
            print(f"[Warning] 요청 실패 {r.status_code}")
            break
        batch = r.json().get("items", [])
        if not batch:
            break
        repos.extend(batch)
        time.sleep(delay)
    return repos

In [4]:
def get_repo_stats(owner, repo_name):
    url = f"https://api.github.com/repos/{owner}/{repo_name}"
    r = requests.get(url, headers=headers)
    if r.status_code != 200:
        return {"commits": None, "contributors": None, "open_issues": None}
    repo = r.json()

    # 커밋 수 추정
    commits_url = f"https://api.github.com/repos/{owner}/{repo_name}/commits?per_page=1"
    commits_res = requests.get(commits_url, headers=headers)
    commits = None
    if "Link" in commits_res.headers:
        try:
            last_page = [l for l in commits_res.headers["Link"].split(",") if 'rel="last"' in l][0]
            commits = int(last_page.split("page=")[-1].split(">")[0])
        except:
            commits = None
    else:
        commits = len(commits_res.json())

    # 기여자 수 추정
    contrib_url = f"https://api.github.com/repos/{owner}/{repo_name}/contributors?per_page=1&anon=true"
    contrib_res = requests.get(contrib_url, headers=headers)
    contributors = None
    if "Link" in contrib_res.headers:
        try:
            last_page = [l for l in contrib_res.headers["Link"].split(",") if 'rel="last"' in l][0]
            contributors = int(last_page.split("page=")[-1].split(">")[0])
        except:
            contributors = None
    else:
        contributors = len(contrib_res.json())

    # 오픈 이슈 수
    open_issues = repo.get("open_issues_count", None)

    return {"commits": commits, "contributors": contributors, "open_issues": open_issues}

In [5]:
all_repos = []
for kw in keyword:
    print(f"Searching keyword: {kw}")
    result = search_repos(kw)
    all_repos.extend(result)

# full_name 기준 중복 제거
unique_repos = {repo["full_name"]: repo for repo in all_repos}
repos_list = list(unique_repos.values())
print(f"총 {len(repos_list)}개의 고유 레포 수집 완료")

Searching keyword: robotics
Searching keyword: ROS


KeyboardInterrupt: 

In [None]:
repo_data = []
for idx, repo in enumerate(repos_list, start=1):
    owner, name = repo["owner"]["login"], repo["name"]
    stats = get_repo_stats(owner, name)
    repo_data.append({
        "full_name": repo["full_name"],
        "description": repo["description"],
        "stars": repo["stargazers_count"],
        "forks": repo["forks_count"],
        "language": repo["language"],
        "html_url": repo["html_url"],
        "updated_at": repo["updated_at"],
        "commits": stats["commits"],
        "contributors": stats["contributors"],
        "open_issues": stats["open_issues"]
    })
    print(f"[{idx}/{len(repos_list)}] {repo['full_name']} 완료")
    time.sleep(delay)

df = pd.DataFrame(repo_data)

[1/423] Developer-Y/cs-video-courses 완료
[2/423] commaai/openpilot 완료
[3/423] NaiboWang/EasySpider 완료
[4/423] mudler/LocalAI 완료
[5/423] Genesis-Embodied-AI/Genesis 완료
[6/423] AtsushiSakai/PythonRobotics 완료
[7/423] wechaty/wechaty 완료
[8/423] huggingface/lerobot 완료
[9/423] hubotio/hubot 완료
[10/423] zauberzeug/nicegui 완료
[11/423] ArduPilot/ardupilot 완료
[12/423] bulletphysics/bullet3 완료
[13/423] rwaldron/johnny-five 완료
[14/423] DLR-RM/stable-baselines3 완료
[15/423] google-deepmind/mujoco 완료
[16/423] kornia/kornia 완료
[17/423] mozilla/TTS 완료
[18/423] rerun-io/rerun 완료
[19/423] hybridgroup/gobot 완료
[20/423] nasa-jpl/open-source-rover 완료
[21/423] StockSharp/StockSharp 완료
[22/423] dusty-nv/jetson-inference 완료
[23/423] Hypfer/Valetudo 완료
[24/423] cartographer-project/cartographer 완료
[25/423] firerpa/lamda 완료
[26/423] wzpan/wukong-robot 완료
[27/423] pliang279/awesome-multimodal-ml 완료
[28/423] lich0821/WeChatFerry 완료
[29/423] jason718/awesome-self-supervised-learning 완료
[30/423] ClemensElflein/OpenMo

In [9]:
df.to_sql(table_name, con=engine, if_exists="append", index=False, chunksize=100)
print(f"{table_name} 테이블에 데이터 저장 완료")

github_repo 테이블에 데이터 저장 완료


In [10]:
df

Unnamed: 0,full_name,description,stars,forks,language,html_url,updated_at,commits,contributors,open_issues
0,Developer-Y/cs-video-courses,List of Computer Science courses with video le...,70069,9410,,https://github.com/Developer-Y/cs-video-courses,2025-10-20T10:28:54Z,490,112,2
1,commaai/openpilot,openpilot is an operating system for robotics....,58459,10331,Python,https://github.com/commaai/openpilot,2025-10-20T10:33:23Z,16038,700,187
2,NaiboWang/EasySpider,A visual no-code/code-free web crawler/spider易...,43105,5295,JavaScript,https://github.com/NaiboWang/EasySpider,2025-10-20T10:29:14Z,655,15,252
3,mudler/LocalAI,":robot: The free, Open Source alternative to O...",35924,2848,Go,https://github.com/mudler/LocalAI,2025-10-20T09:34:09Z,4833,150,312
4,Genesis-Embodied-AI/Genesis,A generative world for general-purpose robotic...,27422,2517,Python,https://github.com/Genesis-Embodied-AI/Genesis,2025-10-20T08:17:13Z,833,68,118
...,...,...,...,...,...,...,...,...,...,...
418,1326323928/RoboticArm-of-UR5-like,,45,8,,https://github.com/1326323928/RoboticArm-of-UR...,2025-10-18T18:03:49Z,12,1,0
419,joepuzzo/robot-viewer,A 6 axis robotic arm similator and control sof...,45,15,JavaScript,https://github.com/joepuzzo/robot-viewer,2025-09-20T08:23:13Z,291,3,3
420,zalo/Dexter,A rich GUI and example code for visualizing an...,45,7,C#,https://github.com/zalo/Dexter,2025-01-29T06:27:20Z,8,1,0
421,Red-Rabbit-Robotics/rx1_arm_hardware,robotic arm hardware files for rx1 humanoid robot,44,9,,https://github.com/Red-Rabbit-Robotics/rx1_arm...,2025-08-28T03:14:19Z,4,1,2


In [11]:
df.to_csv("github_repo.csv", index=False)