In [1]:
import os
import requests
import pandas as pd

In [2]:
GITHUB_TOKEN = "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"
headers = {"Authorization": f"token {GITHUB_TOKEN}"}

In [9]:
def get_user_repos(username, headers):
    """
    获取用户的仓库信息（包括用户作为 Owner 和 Member 的仓库），并统计总的 Star 数和 Fork 数
    """
    repos = []
    total_stars = 0
    total_forks = 0

    # 遍历仓库类型（Owner 和 Member）
    for repo_type in ["owner", "member"]:
        page = 1
        while True:
            url = f"https://api.github.com/users/{username}/repos?page={page}&per_page=100&type={repo_type}"
            response = requests.get(url, headers=headers)

            if response.status_code != 200:
                print(f"请求 {repo_type} 仓库失败，状态码: {response.status_code}")
                break

            data = response.json()
            if not data:
                break

            for repo in data:
                star_count = repo.get("stargazers_count", 0)
                fork_count = repo.get("forks_count", 0)
                total_stars += star_count
                total_forks += fork_count

                repos.append({
                    "repo_name": repo.get("name"),
                    "repo_description": repo.get("description"),
                    "Star": star_count,
                    "Fork": fork_count,
                    "repo_type": repo_type
                })

            page += 1

    # 将仓库信息转换为 DataFrame
    df_repos = pd.DataFrame(repos)

    return df_repos

In [7]:
def get_user_contributed(username, headers):
    """
    获取用户每个贡献过的仓库的资料，包括在该仓库中的 PushEvent、PullRequestEvent 和 IssuesEvent 的数量，
    以及仓库的 star 数和 fork 数
    """
    repo_contributions = {}
    page = 1

    while True:
        url = f"https://api.github.com/users/{username}/events?page={page}&per_page=100"
        response = requests.get(url, headers=headers)

        if response.status_code != 200:
            print(f"请求用户活动失败，状态码: {response.status_code}")
            break

        events = response.json()
        if not events:
            break

        # 遍历用户的活动，统计每个仓库的事件
        for event in events:
            repo_name = event["repo"]["name"]  # 获取仓库名称

            if repo_name not in repo_contributions:
                # 如果仓库未记录，初始化统计信息
                repo_contributions[repo_name] = {
                    "PushEvent": 0,
                    "PullRequestEvent": 0,
                    "IssuesEvent": 0,
                    "repo_star": 0,
                    "repo_fork": 0
                }

                # 获取仓库的详细信息，包括 star 数和 fork 数
                repo_url = f"https://api.github.com/repos/{repo_name}"
                repo_response = requests.get(repo_url, headers=headers)
                if repo_response.status_code == 200:
                    repo_data = repo_response.json()
                    repo_contributions[repo_name]["repo_star"] = repo_data.get("stargazers_count", 0)
                    repo_contributions[repo_name]["repo_fork"] = repo_data.get("forks_count", 0)
                else:
                    print(f"请求仓库详情失败，状态码: {repo_response.status_code}，仓库: {repo_name}")

            # 根据事件类型更新计数
            if event["type"] == "PushEvent":
                repo_contributions[repo_name]["PushEvent"] += 1
            elif event["type"] == "PullRequestEvent":
                repo_contributions[repo_name]["PullRequestEvent"] += 1
            elif event["type"] == "IssuesEvent":
                repo_contributions[repo_name]["IssuesEvent"] += 1

        page += 1

    # 将统计信息转换为 DataFrame
    result = pd.DataFrame.from_dict(repo_contributions, orient="index").reset_index()
    result.rename(columns={"index": "repo_name"}, inplace=True)

    return result

In [8]:
username = "wycats"
df_repo_contributions = get_user_contributed(username, headers) 
df_repo_contributions

Unnamed: 0,repo_name,PushEvent,PullRequestEvent,IssuesEvent,repo_star,repo_fork
0,glimmerjs/glimmer-vm,46,13,0,1129,190
1,emberjs/ember.js,33,6,0,22470,4209
2,handlebars-lang/handlebars-parser,7,4,0,31,8
3,emberjs/babel-plugin-ember-template-compilation,0,0,0,9,11
4,embroider-build/release-plan,0,0,0,13,6
5,embroider-build/content-tag,0,0,0,9,8
6,wycats/ember-beluga-simulator,0,0,0,0,0
7,wycats/ember-discord-clone,0,0,0,0,0
8,emberjs/rfcs,1,0,0,790,408
9,jdx/mise,0,0,0,9933,286


In [10]:
repos_info = get_user_repos(username, headers)
repos_info

Unnamed: 0,repo_name,repo_description,Star,Fork,repo_type
0,abbot-from-scratch,,10,5,owner
1,abbot-ng,a place to store my in-progress work on the re...,3,2,owner
2,activerecord-import,Extraction of the ActiveRecord::Base#import fu...,3,1,owner
3,active_params,,8,1,owner
4,agendas,TC39 meeting agendas,1,1,owner
...,...,...,...,...,...
393,ember-yard-viewer,,2,0,member
394,packager,Mac OS X Packager for RubyGems,13,0,member
395,atlanta,The official git repository for Merb Day Atlanta,8,1,member
396,RailsDispatch,,3,0,member
