In [3]:
from pydriller import Repository, Commit
import re
import mimetypes

In [4]:
def get_commits(repo_url: str) -> list[Commit]:
     commits = []
     repo = Repository(repo_url)
     for commit in repo.traverse_commits():
          commits.append(commit)
     return commits

def scan_repo(repo_url: str):
     result = {
          "commits": [],
          "files": set(),
          "committers": set(),
          "lines": 0,
     }
     repo = Repository(repo_url)
     for commit in repo.traverse_commits():
          result["commits"].append(commit)
          result["committers"].add(commit.committer.name)
          for file in commit.modified_files:
               result["files"].add(file)
          result["lines"] += commit.insertions - commit.deletions

     return result

In [5]:
# result = get_commits("https://github.com/veloren/veloren")
# print(result)

In [6]:
game_repos = [
    "https://github.com/veloren/veloren",
    "https://github.com/CleverRaven/Cataclysm-DDA",
    "https://github.com/00-Evan/shattered-pixel-dungeon",
    "https://github.com/Anuken/Mindustry",
    "https://github.com/crawl/crawl",
    "https://github.com/Warzone2100/warzone2100",
    "https://github.com/yairm210/Unciv",
    "https://github.com/OpenTTD/OpenTTD",
    "https://github.com/endless-sky/endless-sky",
    "https://github.com/wesnoth/wesnoth",
]

web_repos = [
    "https://github.com/lichess-org/lila",
    "https://github.com/flutter/website",
    "https://github.com/monkeytypegame/monkeytype",
    "https://github.com/reactjs/react.dev",
    "https://github.com/ethereum/ethereum-org-website",
    "https://github.com/kubernetes/website",
    "https://github.com/TheOdinProject/theodinproject",
    "https://github.com/tailwindlabs/tailwindcss.com",
    "https://github.com/bitwarden/clients",
    "https://github.com/privacyguides/privacyguides.org",
]

In [7]:
game_results = []
for game_repo in game_repos:
    game_results.append(get_commits(game_repo))

In [8]:
web_results = []
for web_repo in web_repos:
    web_results.append(get_commits(web_repo))

In [14]:
print(list(map(len, game_results)))
print(list(map(len, web_results)))

[15746, 113732, 7328, 17752, 69516, 20116, 10461, 29449, 8584, 83429]
[60695, 6744, 10883, 6533, 34502, 49554, 6008, 4842, 15278, 3470]


In [30]:
for result in game_results:
    author_commits = {}
    for commit in result:
        if commit.committer.name not in author_commits:
            author_commits[commit.committer.name] = 0
        author_commits[commit.committer.name] += 1
    author_commits = {author: ((commits / len(result)) * 100) for author, commits in author_commits.items()}
    author_commits = dict(sorted(author_commits.items(), key=lambda item: item[1], reverse=True))
    print(author_commits)

{'Joshua Barretto': 17.528261145687793, 'Imbris': 9.494474787247555, 'Sam': 8.764130572843897, 'Marcel': 8.122697828019815, 'Marcel Märtens': 4.6996062492061474, 'juliancoffee': 3.3976883017909314, 'Samuel Keiffer': 3.391337482535247, 'jshipsey': 2.5784326178076973, 'timokoesters': 2.483170328972437, 'Isse': 2.4133113171599136, 'Monty Marz': 1.9624031500063508, 'Forest Anderson': 1.8607900419154069, 'Avi Weinstock': 1.6321605487107835, 'Christof Petig': 1.492442525085736, 'Joshua Yanovski': 1.4225835132732123, 'Pfauenauge90': 1.4162326940175283, 'Ben Wallis': 1.2193572970913247, 'Songtronix': 1.187603200812905, 'Justin Shipsey': 1.149498285278801, 'Acrimon': 0.9208687920741776, 'Snowram': 0.9208687920741776, 'crabman': 0.8764130572843898, 'Maxicarlos08': 0.7811507684491299, 'IsseW': 0.774799949193446, 'Yusuf Bera Ertan': 0.6604852025911343, 'Louis Pearson': 0.6096786485456623, 'jiminycrick': 0.5144163597104026, 'Robin Gilh': 0.5080655404547186, 'Dr. Dystopia': 0.5017147211990347, 'Tree

In [31]:
for result in web_results:
    author_commits = {}
    for commit in result:
        if commit.committer.name not in author_commits:
            author_commits[commit.committer.name] = 0
        author_commits[commit.committer.name] += 1
    author_commits = {author: ((commits / len(result)) * 100) for author, commits in author_commits.items()}
    author_commits = dict(sorted(author_commits.items(), key=lambda item: item[1], reverse=True))
    print(author_commits)

{'Thibault Duplessis': 74.1576736139715, 'GitHub': 9.297306203146881, 'Niklas Fiekas': 4.705494686547492, 'Benedikt Werner': 1.4564626410742236, 'Jonathan Gamble': 1.2867616772386523, 'kraktus': 0.9440645852211879, 'Isaac Levy': 0.8007249361561908, 'Thanh Le': 0.7661257105198122, 'Scala Steward': 0.6837466018617679, 'Ben Rollin': 0.4959222341214268, 'clarkerubber': 0.4810939945629788, 'ProgramFOX': 0.3196309415932119, 'Unihedron': 0.22901392206936322, 'Gordon Martin': 0.21912842903039786, 'Albert Ford': 0.2043001894719499, 'Trevor Fitzgerald': 0.2043001894719499, 'YaFred': 0.15157755993080155, 'Allan Joseph': 0.14169206689183622, 'Adam Morris': 0.1235686629870665, 'Jimmie Elvenmark': 0.11038800560177939, 'Brandon Evans': 0.11038800560177939, 'Vincent Velociter': 0.10544525908229674, 'Sam Ezeh': 0.09885493038965319, 'Daniel Dugovic': 0.08732185517752698, 'Greg Finley': 0.08732185517752698, 'Lakin Wecker': 0.08567427300436609, 'Trevor Bayless': 0.08237910865804432, 'antma': 0.07908394431