### 1 - Imports

In [57]:
import aiohttp
import asyncio
import nest_asyncio
import pandas as pd

### 2 - Run Asyncio

In [58]:
nest_asyncio.apply()

### 3 - Variables

In [59]:
GITHUB_API = "https://api.github.com"
PER_PAGE = 100
USER = "octocat"
TOKEN = "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"
HEADERS = {"Authorization" : f"token {TOKEN}", "Accept": "application/vnd.github.v3+json"}

### 4 - Functions

In [60]:
# Get repositories for a user
async def get_repos(session, user):
    url = f"{GITHUB_API}/users/{user}/repos"
    repos = []
    page = 1

    while True:
        params = {"per_page": PER_PAGE, "page": page}
        async with session.get(url, headers=HEADERS, params=params) as resp:
            resp.raise_for_status()
            batch = await resp.json()
            if not batch:
                break
            repos.extend(batch)
            if 'next' in resp.links:
                page += 1
            else:
                break
    return repos


# Get commits for a specific repository
async def get_commits(session, user, repo):
    url = f"{GITHUB_API}/repos/{user}/{repo}/commits"
    commits = []
    page = 1

    while True:
        params = {"per_page": PER_PAGE, "page": page}
        async with session.get(url, headers=HEADERS, params=params) as resp:
            resp.raise_for_status()
            batch = await resp.json()
            if not batch:
                break
            commits.extend(batch)
            if 'next' in resp.links:
                page += 1
            else:
                break
    return commits


# Get contents for a specific repository
async def get_contents(session, user, repo):
    url = f"{GITHUB_API}/repos/{user}/{repo}/contents"
    contents = []
    page = 1

    while True:
        params = {"per_page": PER_PAGE, "page": page}
        async with session.get(url, headers=HEADERS, params=params) as resp:
            resp.raise_for_status()
            batch = await resp.json()
            if not batch:
                break
            contents.extend(batch)
            if 'next' in resp.links:
                page += 1
            else:
                break
    return contents


# Fetch only public repositories for a user
async def fetch_all_repos_only(user):
    async with aiohttp.ClientSession() as session:
        repos = await get_repos(session, user)
        return [
            {
                "name": repo["name"],
                "full_name": repo["full_name"],
                "description": repo.get("description"),
                "url": repo["html_url"],
                "created_at": repo["created_at"],
                "updated_at": repo["updated_at"],
                "language": repo.get("language")
            }
            for repo in repos
        ]


# Fetch commits from all repos concurrently
async def fetch_all_commits(user):
    report = []
    async with aiohttp.ClientSession() as session:
        repos = await get_repos(session, user)

        tasks = []
        for repo in repos:
            repo_name = repo["name"]
            repo_owner = repo["owner"]["login"]
            tasks.append(fetch_repo_commits(session, repo_owner, repo_name, report))

        await asyncio.gather(*tasks)
    return report


# Fetch contents from all repos concurrently
async def fetch_all_contents(user):
    report = []
    async with aiohttp.ClientSession() as session:
        repos = await get_repos(session, user)

        tasks = []
        for repo in repos:
            repo_name = repo["name"]
            repo_owner = repo["owner"]["login"]
            tasks.append(fetch_repo_contents(session, repo_owner, repo_name, report))

        await asyncio.gather(*tasks)
    return report



# Auxiliar: Fetch commits for a repo and append to report
async def fetch_repo_commits(session, user, repo, report):
    commits = await get_commits(session, user, repo)
    for c in commits:
        report.append({
            "repo": repo,
            "sha": c["sha"],
            "author": c["commit"]["author"]["name"],
            "date": c["commit"]["author"]["date"],
            "message": c["commit"]["message"].split("\n")[0]
        })


# Auxiliar: Fetch contents for a repo and append to report
async def fetch_repo_contents(session, user, repo, report):
    contents = await get_contents(session, user, repo)
    for t in contents:
        report.append({
            "repo": repo,
            "name": t["name"],
            "sha": t["sha"],
            "type": t["type"],
            "size": t["size"]
        })

### 5 - Extract and Reports

In [61]:
# get all repositories
resultsR = await fetch_all_repos_only(USER)
dfR = pd.DataFrame(resultsR)
dfR.to_csv("github_repositories_report.csv", index=False)
dfR.head(100)

Unnamed: 0,name,full_name,description,url,created_at,updated_at,language
0,boysenberry-repo-1,octocat/boysenberry-repo-1,Testing,https://github.com/octocat/boysenberry-repo-1,2018-05-10T17:51:29Z,2025-06-23T04:56:59Z,
1,git-consortium,octocat/git-consortium,This repo is for demonstration purposes only.,https://github.com/octocat/git-consortium,2014-03-28T17:55:38Z,2025-06-22T07:25:39Z,
2,hello-worId,octocat/hello-worId,My first repository on GitHub.,https://github.com/octocat/hello-worId,2014-06-18T21:26:19Z,2025-06-26T01:12:04Z,
3,Hello-World,octocat/Hello-World,My first repository on GitHub!,https://github.com/octocat/Hello-World,2011-01-26T19:01:12Z,2025-06-24T20:07:55Z,
4,linguist,octocat/linguist,Language Savant. If your repository's language...,https://github.com/octocat/linguist,2016-08-02T17:35:14Z,2025-06-23T12:02:05Z,Ruby
5,octocat.github.io,octocat/octocat.github.io,,https://github.com/octocat/octocat.github.io,2014-03-18T20:54:39Z,2025-06-25T08:54:05Z,CSS
6,Spoon-Knife,octocat/Spoon-Knife,This repo is for demonstration purposes only.,https://github.com/octocat/Spoon-Knife,2011-01-27T19:30:43Z,2025-06-26T01:18:48Z,HTML
7,test-repo1,octocat/test-repo1,,https://github.com/octocat/test-repo1,2016-04-14T21:29:25Z,2025-06-26T01:12:04Z,


In [62]:
# get all commits
resultsC = await fetch_all_commits(USER)
dfC = pd.DataFrame(resultsC)
dfC.to_csv("github_commits_report.csv", index=False)
dfC.head(100)


Unnamed: 0,repo,sha,author,date,message
0,boysenberry-repo-1,d09e445076bbcd163fc9abfbe6d2fce09a611281,Jessica Canepa,2016-12-12T23:11:00Z,"Set theme: ""jekyll-theme-minimal"" in _config.yml."
1,boysenberry-repo-1,642f3f60f5989713b472447912fcfdcb74b0bd1a,Jessica Canepa,2016-12-12T23:06:48Z,Create Jekyll site.
2,boysenberry-repo-1,cc319a502ddb791661067a22fd47ada75073865d,jmarlena,2016-08-17T04:52:16Z,add new version of readme
3,boysenberry-repo-1,6e9828400139a9718d6bf41bcf8456bc0edb1921,Jessica Canepa,2016-07-28T17:04:30Z,Initial commit
4,Spoon-Knife,d0dd1f61b33d64e29d8bc1372a94ef6a2fee76a9,The Octocat,2014-02-12T23:20:44Z,Pointing to the guide for forking
...,...,...,...,...,...
95,linguist,144a85b775fe5b24a38c8328ccc508f346e95d7e,Liav Turkia,2016-06-25T03:00:16Z,Rename it to have .ino
96,linguist,a02790427818b219862f9760732f0a5456bc22e1,Liav Turkia,2016-06-25T02:55:32Z,Add another .ino sample for more precision
97,linguist,5ac2cdde5053d3cf811a197d35af713e6330e91f,Sahil Dua,2016-06-22T05:05:28Z,Add fontello CSS files to vendor.yml (#3068)
98,linguist,5c705b33676bc4b224e6ec785061bede7d6a82f8,Paul Chaignon,2016-06-22T05:02:05Z,Merge pull request #3069 from Alhadis/document...


In [63]:
# get all contents
resultsT = await fetch_all_contents(USER)
dfT = pd.DataFrame(resultsT)
dfT.to_csv("github_contents_report.csv", index=False)
dfT.head(100)

Unnamed: 0,repo,name,sha,type,size
0,boysenberry-repo-1,README.md,dbae7e62708b8f5fe7fdc474e4cc27f63cea6a3d,file,1291
1,boysenberry-repo-1,READTHIS.md,e69de29bb2d1d6434b8b29ae775ad8c2e48c5391,file,0
2,boysenberry-repo-1,_config.yml,06bbaece1a494835566a68b5032ee3474d92fd65,file,28
3,git-consortium,LICENSE,37a7d6ca3281f5d13d6a4f01e3fb759d9edbdc7f,file,1077
4,git-consortium,README.md,5687f2709fad262d3050e3f69b7e8fff67f7f4e2,file,306
5,git-consortium,product-backlog.md,1cb3fa158ca4fb160f404b57a71e58126762fc90,file,13031
6,Spoon-Knife,README.md,f4790267d0d362a90d6799759ece092616c40779,file,780
7,Spoon-Knife,index.html,a83618bcf17b4e8e643de75d09adc0e892043020,file,355
8,Spoon-Knife,styles.css,9b8528455cf79bca41ac100bcb531fcbf580985e,file,256
9,Hello-World,README,980a0d5f19a64b4b30a87d4206aade58726b60e3,file,13
