Importing libraries



In [24]:
import os
import pandas as pd
import requests

Setting environment variable (temporary during session)


In [25]:

os.environ['GITHUB_TOKEN'] = 'THE_TOKEN_HERE'

# Access it later
token = os.getenv("GITHUB_TOKEN")

HEADERS = {
      "Authorization": f"Bearer {os.getenv('GITHUB_TOKEN')}",
    "Accept": "application/vnd.github+json"
}

GETTING REPO INFO

In [26]:
# GitHub API endpoint to get repos info
ORG_NAME = "tidyverse"
url = f"https://api.github.com/orgs/{ORG_NAME}/repos"

In [27]:
response = requests.get(url, headers=HEADERS)

In [28]:
# Check status and process data
if response.status_code == 200:
    repos_data = response.json()

    # Let's extract a few fields for each repo
    repo_list = []
    for repo in repos_data:
        repo_list.append({
            "name": repo["name"],
            "full_name": repo["full_name"],
            "description": repo["description"],
            "created_at": repo["created_at"],
            "updated_at": repo["updated_at"],
            "language": repo["language"],
            "stargazers_count": repo["stargazers_count"],
            "html_url": repo["html_url"]
        })

    # Convert to DataFrame for easy viewing
    df_repos = pd.DataFrame(repo_list)
else:
    print(f"Failed to fetch repos. Status code: {response.status_code}")
    print(response.text)

In [36]:
df_repos.to_csv("tidyverse_repos_csv", index = False)
df_repos.head()

Unnamed: 0,name,full_name,description,created_at,updated_at,language,stargazers_count,html_url
0,ggplot2,tidyverse/ggplot2,An implementation of the Grammar of Graphics in R,2008-05-25T01:21:32Z,2025-06-23T01:31:49Z,R,6711,https://github.com/tidyverse/ggplot2
1,lubridate,tidyverse/lubridate,Make working with dates in R just that little ...,2009-03-11T01:18:52Z,2025-06-05T12:26:37Z,R,772,https://github.com/tidyverse/lubridate
2,stringr,tidyverse/stringr,A fresh approach to string manipulation in R,2009-11-08T22:20:08Z,2025-06-01T01:21:07Z,R,638,https://github.com/tidyverse/stringr
3,dplyr,tidyverse/dplyr,dplyr: A grammar of data manipulation,2012-10-28T13:39:17Z,2025-06-21T07:29:38Z,R,4889,https://github.com/tidyverse/dplyr
4,readr,tidyverse/readr,"Read flat files (csv, tsv, fwf) into R",2013-07-25T15:28:22Z,2025-06-01T01:21:26Z,R,1015,https://github.com/tidyverse/readr


GETTING COMMITS

In [30]:
# Define the repo to get commits from
owner = "tidyverse"
repo_name = "tidyverse"


commits_url = f"https://api.github.com/repos/{owner}/{repo_name}/commits"

# Make the request
params = {
    "per_page": 50,  # rate limit
    "page": 1 # selecting only the first page
}

response_commits = requests.get(commits_url, headers=HEADERS, params=params)

# STEP 3: Process the response
if response_commits.status_code == 200:
    commits_data = response_commits.json()

    # Extract some commit info
    commit_list = []
    for commit in commits_data:
        commit_list.append({
            "sha": commit["sha"],
            "author": commit["commit"]["author"]["name"],
            "date": commit["commit"]["author"]["date"],
            "message": commit["commit"]["message"],
            "url": commit["html_url"]
        })

    df_commits = pd.DataFrame(commit_list)
else:
    print(f"Failed to fetch commits. Status code: {response_commits.status_code}")
    print(response_commits.text)


In [35]:
df_commits.to_csv("tidyverse_commits.csv", index = False)
df_commits.head()

Unnamed: 0,sha,author,date,message,url
0,0231aafbc56914ee5371dd6c7b60677f168d7154,Hadley Wickham,2025-06-18T20:13:42Z,`use_air()` (#358),https://github.com/tidyverse/tidyverse/commit/...
1,8ec56d7134ca6d58450151bf42d3f8a9c1a2407b,Hadley Wickham,2025-06-18T17:52:11Z,Bump requirements to R 4.1 (#357)\n\nAnd use b...,https://github.com/tidyverse/tidyverse/commit/...
2,1c6afa4f6b943480b405f30b9557d4726d776efe,Michael Chirico,2025-06-18T17:26:17Z,Improve organization of .onAttach (#355),https://github.com/tidyverse/tidyverse/commit/...
3,c06a3c9938f55cc6f5cab90ae49df7ed7e8e9eb6,Michael Chirico,2024-10-21T21:59:39Z,Use .lintr to enforce tidyverse style (#337)\n...,https://github.com/tidyverse/tidyverse/commit/...
4,b6bcbcd3432665ba943ec29e2ef188fdfda6b669,Hadley Wickham,2024-10-21T21:42:15Z,Drop modelr (#335),https://github.com/tidyverse/tidyverse/commit/...


GETTING CONTENTS


In [32]:
# STEP 1: Define the repo to get commits from
owner = "tidyverse"
repo_name = "tidyverse"
contents_url = f"https://api.github.com/repos/{owner}/{repo_name}/contents/"



# STEP 2: Make the request (with optional pagination parameters)
params = {
    "per_page": 50,  # adjust as needed
    "page": 1
}

response_contents = requests.get(contents_url, headers=HEADERS, params=params)

# STEP 3: Process the response
if response_contents.status_code == 200:
    content_data = response_contents.json()

    # Extract some content info
    content_list = []
    for content in content_data:
        content_list.append({
            "sha": content["sha"],
            "name": content['name'],
            "size": content["size"],
            "git_url": content["git_url"],
            "url": content["html_url"],
            "type" : content["type"]
        })

    df_content = pd.DataFrame(content_list)
else:
    print(f"Failed to fetch content. Status code: {response_contents.status_code}")
    print(response_contents.text)


In [34]:
df_content.to_csv("tidyverse_contents.csv", index = False)
df_content.head()

Unnamed: 0,sha,name,size,git_url,url,type
0,5714254793a305bfcb8ed74fba2b5f46152d05d6,.Rbuildignore,280,https://api.github.com/repos/tidyverse/tidyver...,https://github.com/tidyverse/tidyverse/blob/ma...,file
1,a27b3bf9b8b98a304c60415cbde7e889f661f7eb,.covrignore,26,https://api.github.com/repos/tidyverse/tidyver...,https://github.com/tidyverse/tidyverse/blob/ma...,file
2,617eb02c39820d261c1e6497f1a3df1bef19dda9,.github,0,https://api.github.com/repos/tidyverse/tidyver...,https://github.com/tidyverse/tidyverse/tree/ma...,dir
3,1556d9a3ff549d0683ba7d21d91a7a46eac1e8b0,.gitignore,72,https://api.github.com/repos/tidyverse/tidyver...,https://github.com/tidyverse/tidyverse/blob/ma...,file
4,22384d21b33851ac1be1851f53daf543fe5f4267,.lintr,128,https://api.github.com/repos/tidyverse/tidyver...,https://github.com/tidyverse/tidyverse/blob/ma...,file
