In [23]:
import requests
import pandas as pd
from datetime import datetime

# 设置仓库的基本信息
owner = "zhicheng-ning"
repo = "od-api"

# 获取提交信息
def get_commits(owner, repo):
    commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits"
    response = requests.get(commits_url)
    commits = response.json()
    return commits

# 获取Pull Request信息
def get_pull_requests(owner, repo):
    prs_url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all"
    response = requests.get(prs_url)
    prs = response.json()
    return prs

# 获取Issue信息
def get_issues(owner, repo):
    issues_url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=all"
    response = requests.get(issues_url)
    issues = response.json()
    return issues

# 处理提交信息
def process_commits(commits):
    commit_data = []
    for commit in commits:
        author = commit['commit']['author']['name']
        date = commit['commit']['author']['date']
        message = commit['commit']['message']
        commit_data.append({
            "author": author,
            "date": date,
            "message": message,
            "type": "code"
        })
    return commit_data

# 处理Pull Request信息
def process_pull_requests(prs):
    pr_data = []
    for pr in prs:
        author = pr['user']['login']
        created_at = pr['created_at']
        merged_at = pr['merged_at']
        pr_data.append({
            "author": author,
            "created_at": created_at,
            "merged_at": merged_at,
            "type": "review"
        })
    return pr_data

# 处理Issue信息
def process_issues(issues):
    issue_data = []
    for issue in issues:
        author = issue['user']['login']
        created_at = issue['created_at']
        closed_at = issue.get('closed_at', None)
        issue_data.append({
            "author": author,
            "created_at": created_at,
            "closed_at": closed_at,
            "type": "discussion"
        })
    return issue_data

# 获取数据
commits = get_commits(owner, repo)
prs = get_pull_requests(owner, repo)
issues = get_issues(owner, repo)

# 处理数据
commit_data = process_commits(commits)
pr_data = process_pull_requests(prs)
issue_data = process_issues(issues)

# 合并数据
collaboration_data = commit_data + pr_data + issue_data

# 打印协作数据
for data in collaboration_data:
    print(data)

## 写入csv
csv_file = 'collaboration_data.csv'
with open(csv_file, mode='w', newline='', encoding='utf-8-sig') as file:
    writer = csv.DictWriter(file, fieldnames=["author", "date", "message", "type", "created_at", "merged_at", "closed_at"])
    writer.writeheader()
    for data in collaboration_data:
        writer.writerow(data)

# 打印统计信息
print(f"\nTotal Commits: {len(commit_data)}")
print(f"Total Pull Requests: {len(pr_data)}")
print(f"Total Issues: {len(issue_data)}")

{'author': 'nzcer', 'date': '2023-12-11T08:39:46Z', 'message': 'fix: add exception handling', 'type': 'code'}
{'author': '逝不等琴生', 'date': '2023-04-20T11:32:39Z', 'message': 'refactor: fix-thread-executor (#38)', 'type': 'code'}
{'author': 'andyhuang18', 'date': '2023-04-09T09:19:10Z', 'message': 'ci: support issue comment label (#36)\n\n* add issue workflow v2', 'type': 'code'}
{'author': '逝不等琴生', 'date': '2023-04-07T14:43:39Z', 'message': 'Update publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate application-dev.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate Dockerfile\n\nUpdate Dockerfile\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\nUpdate publish.yml\n\

In [31]:
import requests
import pandas as pd
from datetime import datetime

# 设置仓库的基本信息
owner = "leveldb Team"
repo = "leveldb"

# 设置起始和结束日期
start_date = datetime(2019, 1, 1).isoformat()
end_date = datetime(2024, 7, 7).isoformat()

# 获取提交信息
def get_commits(owner, repo, start_date, end_date):
    commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits"
    params = {
        'since': start_date,
        'until': end_date
    }
    response = requests.get(commits_url, params=params)
    commits = response.json()
    return commits

# 获取Pull Request信息
def get_pull_requests(owner, repo, start_date, end_date):
    prs_url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all"
    params = {
        'since': start_date,
        'until': end_date
    }
    response = requests.get(prs_url, params=params)
    prs = response.json()
    return prs

# 获取Issue信息
def get_issues(owner, repo, start_date, end_date):
    issues_url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=all"
    params = {
        'since': start_date,
        'until': end_date
    }
    response = requests.get(issues_url, params=params)
    issues = response.json()
    return issues

# 处理提交信息
def process_commits(commits):
    commit_data = []
    for commit in commits:
        author = commit['commit']['author']['name']
        date = commit['commit']['author']['date']
        message = commit['commit']['message']
        commit_data.append({
            "author": author,
            "date": date,
            "message": message,
            "type": "code"
        })
    return commit_data

# 处理Pull Request信息
def process_pull_requests(prs):
    pr_data = []
    for pr in prs:
        author = pr['user']['login']
        created_at = pr['created_at']
        merged_at = pr['merged_at']
        pr_data.append({
            "author": author,
            "created_at": created_at,
            "merged_at": merged_at,
            "type": "review"
        })
    return pr_data

# 处理Issue信息
def process_issues(issues):
    issue_data = []
    for issue in issues:
        author = issue['user']['login']
        created_at = issue['created_at']
        closed_at = issue.get('closed_at', None)
        issue_data.append({
            "author": author,
            "created_at": created_at,
            "closed_at": closed_at,
            "type": "discussion"
        })
    return issue_data

# all_commits = []
# all_prs = []
# all_issues = []
# # 获取数据（定期地）
# while true:
#     commits = get_commits(owner, repo, start_date, end_date)
#     all_commits.extend(commits)
    
#     prs = get_pull_requests(owner, repo, start_date, end_date)
#     all_prs.extend(prs)
    
#     issues = get_issues(owner, repo, start_date, end_date)
#     all_issues.extend(issues)

# 处理数据
commit_data = process_commits(commits)
pr_data = process_pull_requests(prs)
issue_data = process_issues(issues)

# 合并数据
collaboration_data = commit_data + pr_data + issue_data

# 打印协作数据
for data in collaboration_data:
    print(data)

## 写入csv
csv_file = 'collaboration_data2.csv'
with open(csv_file, mode='w', newline='', encoding='utf-8-sig') as file:
    writer = csv.DictWriter(file, fieldnames=["author", "date", "message", "type", "created_at", "merged_at", "closed_at"])
    writer.writeheader()
    for data in collaboration_data:
        writer.writerow(data)

# 打印统计信息
print(f"\nTotal Commits: {len(commit_data)}")
print(f"Total Pull Requests: {len(pr_data)}")
print(f"Total Issues: {len(issue_data)}")

{'author': 'wangyunlai', 'date': '2023-12-25T05:54:11Z', 'message': 'add sysbench liscence (#329)\n\n### What problem were solved in this pull request?\r\n\r\nProblem:\r\nminiob uses sysbench as the concurrency testing tool but there is no\r\nsysbench liscence\r\n\r\n### What is changed and how it works?\r\nadd sysbench liscence', 'type': 'code'}
{'author': 'Helloworld-lbl', 'date': '2023-12-04T13:21:29Z', 'message': 'Use Ctrl+C to exit observer in cli mode. (#328)\n\n### What problem were solved in this pull request?\r\n\r\nIssue Number: close #281\r\n\r\nProblem: If start observer in cli mode, we cannot exit observer by\r\nCtrl+C.\r\n\r\n### What is changed and how it works?\r\n\r\n    在待输入命令时按下Ctrl+C后，注意到日志中有这样的记录：\r\n"[2023-12-04 11:20:30.830780 pid:5338 tid:7fcf688ee7c0 ctx:0 WARN:\r\nmy_readline@cli_communicator.cpp:71] >> failed to read line: Interrupted\r\nsystem call"\r\n    结合“cli_communicator.cpp:71”代码，可以知道此时系统错误被捕捉到且记录在errno中（此时errno为4）。\r\n\r\n故当fgets读到空字符串时，若errno==4，可判定此