# Fetch all GitHub Release info for CNCF projects

## install dependencies

In [1]:
%pip list | grep -E 'pandas|dask|sqlalchemy|psycopg2-binary|graphviz|PyGitHub|opentelemetry-api|opentelemetry-sdk|python-dotenv|jsonpath-ng'
%pip install pandas dask sqlalchemy psycopg2-binary graphviz PyGitHub opentelemetry-api opentelemetry-sdk python-dotenv jsonpath-ng 

dask                               2023.10.0
dask_labextension                  7.0.0
jsonpath-ng                        1.6.0
opentelemetry-api                  1.20.0
opentelemetry-sdk                  1.20.0
pandas                             2.0.3
psycopg2-binary                    2.9.9
python-dotenv                      1.0.0
Note: you may need to restart the kernel to use updated packages.
Collecting graphviz
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.0/47.0 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: graphviz
Successfully installed graphviz-0.20.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip list | grep -E 'pandas|dask|sqlalchemy|psycopg2-binary|graphvizPy|GitHub|opentelemetry-api|opentelemetry-sdk|python-dotenv|jsonpath-ng'

dask                               2023.10.0
dask_labextension                  7.0.0
jsonpath-ng                        1.6.0
opentelemetry-api                  1.20.0
opentelemetry-sdk                  1.20.0
pandas                             2.0.3
psycopg2-binary                    2.9.9
python-dotenv                      1.0.0
Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import time
import json
from datetime import datetime, timezone

import pandas as pd
from typing import List
from github import Github, GithubException

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 150)

# for PAT / token
from dotenv import load_dotenv
load_dotenv()

True

## Load OSRB repo list

In [4]:
def safe_set_index(df:         pd.DataFrame, 
                   idx_wanted: list[str]) -> pd.DataFrame:

    # check to see if the index is already set, else, data loss as set_index can be destructive
    idx_existing = list(df.index.names)

    if idx_wanted == idx_existing:
        print(f'\n*** WARNING: attempt to set index to what it already is thwarted! \n')
    else:
        df.set_index(idx_wanted, verify_integrity=True, inplace=True) # note: index must be unique!
        df.sort_index(inplace=True, ascending=True)
    return df

def split_org_repo(df:               pd.DataFrame, 
                   colname:          str,
                   drop:             bool = False,
                   newcol_org_name:  str = 'org_name',
                   newcol_repo_name: str = 'repo_name',
                   multi_idx:        bool = False) -> pd.DataFrame:
    '''split_org_repo(df, colname) - org_name/repo_name --> org_name, repo_named'''
    
    if colname is None:
        raise ValueError('split_org_repo: missing colname!')

    # df['tmp'] = df[colname].copy()
    # df_newcols = df['tmp'].str.split(pat='/', n=1, expand=True)

    # https://swdevnotes.com/python/2022/extract-data-from-json-in-pandas-dataframe/
    # expand=True returns a dataframe  which we can rename columns on
    df_newcols = df[colname].copy().str.split(pat='/', n=1, expand=True)
    df_newcols.rename(columns={0: newcol_org_name, 1: newcol_repo_name}, inplace=True)

    if drop:
        df.drop(colname, axis=1, inplace=True)

    df = pd.concat([df,df_newcols], axis=1)

    if multi_idx:
        safe_set_index(df, idx_wanted=[newcol_org_name, newcol_repo_name])
    else:
        safe_set_index(df, idx_wanted=[colname])

    return df

#############

def load_repos(fname: str=None, splitcols: bool=False, lowercase: bool=False) -> pd.DataFrame:
    '''Load repos from a file'''

    with open(fname, 'r') as f:
        df = pd.DataFrame(f.readlines(), columns=['name'])

    # strip comments (note '~' negation in selector)
    df = df[~df['name'].astype(str).str.startswith('#')]

    # clean up trailing newlines and where OSRB API returns a trailing slash
    df.name = df.name.str.rstrip(to_strip='\n')
    df.name = df.name.str.rstrip(to_strip='/')

    if lowercase:
        df.name = df.name.str.lower()

    # {name: someOrg/someRepo} --> { name: 'someOrg/someRepo', org_name = 'someOrg', repo_name = 'someRepo'
    if splitcols:
        df = split_org_repo(df, colname='name', multi_idx=True)
    else:
        # split_org_repo handles setting the index and sorting
        safe_set_index(df, idx_wanted=['name'])
    return df




In [5]:
OSRB_FILE = '../../data/osrb/osrb-repos.txt'
#OSRB_FILE = '../../data/osrb/osrb-repos-debugsmall.txt'

osrb = load_repos(OSRB_FILE, lowercase=True)
display(osrb.head())

osrb_split = load_repos(OSRB_FILE, splitcols=True, lowercase=True)
display(osrb_split)

osrb_split.groupby('org_name').count()

academysoftwarefoundation/materialx
academysoftwarefoundation/opencolorio
academysoftwarefoundation/opencue/tree/master
academysoftwarefoundation/openexr
academysoftwarefoundation/openvdb


Unnamed: 0_level_0,Unnamed: 1_level_0,name
org_name,repo_name,Unnamed: 2_level_1
academysoftwarefoundation,materialx,academysoftwarefoundation/materialx
academysoftwarefoundation,opencolorio,academysoftwarefoundation/opencolorio
academysoftwarefoundation,opencue/tree/master,academysoftwarefoundation/opencue/tree/master
academysoftwarefoundation,openexr,academysoftwarefoundation/openexr
academysoftwarefoundation,openvdb,academysoftwarefoundation/openvdb
academysoftwarefoundation,rez,academysoftwarefoundation/rez
apache,airflow,apache/airflow
apache,arrow,apache/arrow
apache,cassandra,apache/cassandra
apache,cloudstack,apache/cloudstack


Unnamed: 0_level_0,name
org_name,Unnamed: 1_level_1
academysoftwarefoundation,6
apache,25
apple,21
autodesk,1
aws,1
bazelbuild,1
benchopt,1
boostorg,1
buildbarn,1
buildbot,1


## Fetch project high level metadata from GitHub API

In [6]:
import time
import json
import pandas as pd
from typing import List, Tuple
from datetime import datetime, timezone
from github import Github, GithubException
from jsonpath_ng import parse

def fetch_repo_data(token: str, 
                    repo_list: List[str], 
                    include_releases: bool=False,
                    releases_since: datetime=None, 
                    json_file: str=None, 
                    csv_file: str=None, 
                    state_file: str=None, 
                    properties: List[Tuple[str, str]]=None) -> pd.DataFrame:

    # Initialize DataFrame
    df = pd.DataFrame()

    # Initialize GitHub client
    g = Github(token)

    # Initialize loop state
    if state_file:
        try:
            with open(state_file, 'r') as f:
                state = json.load(f)
        except FileNotFoundError:
            state = {'i': 0, 'repos_done': []}
    else:
        state = {'i': 0, 'repos_done': []}

    while state['i'] < len(repo_list):
        repo_str = repo_list[state['i']]

        # Skip repository if already done
        if repo_str in state['repos_done']:
            print(f"Skipping {repo_str}")
            state['i'] += 1
            continue

        while True:
            try:
                repo = g.get_repo(repo_str)
                break
            except GithubException as e:
                if e.status == 404:
                    print(f"Repository {repo_str} not found")
                    break
                elif e.status == 429:
                    print(f"Rate limit exceeded, waiting for {e.headers['Retry-After']} seconds...")
                    time.sleep(int(e.headers['Retry-After']))
                else:
                    print(f"Error getting repository {repo_str}: {e}")
                    break

        if not repo:
            state['i'] += 1
            continue

        print(f"{repo_str} Fetching Metadata...")
        repo_dict = repo.raw_data

        if properties is None:
            # Add all properties to DataFrame
            df = pd.concat([df, pd.json_normalize(repo_dict)])
        else:
            # Add specific (and specified!) properties to DataFrame
            for prop_path, col_name in properties:
                jsonpath_expr = parse(prop_path)
                matches = jsonpath_expr.find(repo_dict)
                values = [match.value for match in matches]
                df[col_name] = values

        if include_releases:
            releases = repo.get_releases()
            for release in releases:
                if since is None or release.created_at >= since:
                    df = pd.concat([df, pd.DataFrame({
                        'repo_name': [repo_str],
                        'release_name': [release.title],
                        'release_date': [str(release.published_at)],
                        'language': [repo.language],
                        'release_notes': [release.body],
                        'stars': [repo.stargazers_count],
                        'forks': [repo.forks_count],
                        'open_issues': [repo.open_issues_count]
                    })])
                    print(f"Added {release.published_at}, {repo_str}::{release.title}  ")

        # Save state
        if state_file:
            state['repos_done'].append(repo_str)
            with open(state_file, 'w') as f:
                json.dump(state, f, indent=4)

        state['i'] += 1

    # Save as CSV
    if csv_file:
        df.to_csv(csv_file, index=False)

    # Save as JSON
    if json_file:
        # with open(json_file, 'w') as f:
        #     json.dump(df.to_dict(orient='records', f, indent=4)
        df.to_json(json_file, orient='records', indent=4, lines=True)

    return df



## Get repo metadata from GitHub (REST) API

In [7]:
osrb.reset_index(inplace=False).name.to_list()

['academysoftwarefoundation/materialx',
 'academysoftwarefoundation/opencolorio',
 'academysoftwarefoundation/opencue/tree/master',
 'academysoftwarefoundation/openexr',
 'academysoftwarefoundation/openvdb',
 'academysoftwarefoundation/rez',
 'apache/airflow',
 'apache/arrow',
 'apache/cassandra',
 'apache/cloudstack',
 'apache/druid',
 'apache/flink',
 'apache/flume',
 'apache/hadoop',
 'apache/hbase',
 'apache/hive',
 'apache/iceberg',
 'apache/incubator-toree',
 'apache/kafka',
 'apache/kyuubi',
 'apache/logging-log4j2',
 'apache/lucene',
 'apache/orc',
 'apache/ozone',
 'apache/parquet-mr',
 'apache/solr',
 'apache/spark',
 'apache/superset',
 'apache/trafficserver',
 'apache/yunikorn-core',
 'apache/zookeeper',
 'apple/app-store-server-library-swift',
 'apple/batch-processing-gateway',
 'apple/coremltools',
 'apple/darwin-libplatform',
 'apple/fhirmodels',
 'apple/foundationdb',
 'apple/gcgc',
 'apple/password-manager-resources',
 'apple/servicetalk',
 'apple/swift',
 'apple/swift

In [9]:
import os

repos = osrb.reset_index(inplace=False).name.to_list()
since_date = datetime(2023, 1, 1, tzinfo=timezone.utc)

fnbase = 'osrb-repo-summary'
json_file = f'{fnbase}.json'
csv_file = f'{fnbase}.csv'
state_file = f'{fnbase}.state.json'

repodata = fetch_repo_data(os.environ['GITHUB_TOKEN'], 
                           repos,
                           include_releases=False,
                           releases_since=since_date, 
                           json_file=json_file, 
                           csv_file=csv_file)

                           #state_file=state_file

print(repodata.info(show_counts=True))
display(repodata.head())

academysoftwarefoundation/materialx Fetching Metadata...
academysoftwarefoundation/opencolorio Fetching Metadata...
Repository academysoftwarefoundation/opencue/tree/master not found
academysoftwarefoundation/opencue/tree/master Fetching Metadata...
academysoftwarefoundation/openexr Fetching Metadata...
academysoftwarefoundation/openvdb Fetching Metadata...
academysoftwarefoundation/rez Fetching Metadata...
apache/airflow Fetching Metadata...
apache/arrow Fetching Metadata...
apache/cassandra Fetching Metadata...
apache/cloudstack Fetching Metadata...
apache/druid Fetching Metadata...
apache/flink Fetching Metadata...
apache/flume Fetching Metadata...
apache/hadoop Fetching Metadata...
apache/hbase Fetching Metadata...
apache/hive Fetching Metadata...
apache/iceberg Fetching Metadata...
apache/incubator-toree Fetching Metadata...
apache/kafka Fetching Metadata...
apache/kyuubi Fetching Metadata...
apache/logging-log4j2 Fetching Metadata...
apache/lucene Fetching Metadata...
apache/orc 

Following Github server redirection from /repos/jetstack/cert-manager to /repositories/92313258


jetstack/cert-manager Fetching Metadata...
Repository jupyter not found
jupyter Fetching Metadata...
jupyter-server/enterprise_gateway Fetching Metadata...
kata-containers/kata-containers Fetching Metadata...
kubeflow/kubeflow Fetching Metadata...
kubernetes/kubernetes Fetching Metadata...
kubevirt/kubevirt Fetching Metadata...
lzfse/lzfse Fetching Metadata...
machyve/xhyve Fetching Metadata...


Following Github server redirection from /repos/maproulette/maproulette2 to /repositories/53699564


maproulette/maproulette2 Fetching Metadata...
microsoft/typescript Fetching Metadata...
multipath-tcp/mptcp Fetching Metadata...
netty/netty Fetching Metadata...
open-policy-agent/opa Fetching Metadata...
open-telemetry/opentelemetry.io Fetching Metadata...
osmlab/atlas Fetching Metadata...
osquery/osquery Fetching Metadata...


Following Github server redirection from /repos/pixaranimationstudios/usd to /repositories/58168143


pixaranimationstudios/usd Fetching Metadata...
playframework/playframework Fetching Metadata...
prometheus/prometheus Fetching Metadata...
pulumi/pulumi Fetching Metadata...
pytorch/pytorch Fetching Metadata...
qemu/qemu Fetching Metadata...
researchkit/researchkit Fetching Metadata...
scverse/scanpy Fetching Metadata...
seleniumhq/selenium Fetching Metadata...
spinnaker/spinnaker Fetching Metadata...
strimzi/strimzi-kafka-operator Fetching Metadata...
swift-server/swift-aws-lambda-runtime Fetching Metadata...
swift-server/swift-backtrace Fetching Metadata...
Repository tektoncd not found
tektoncd Fetching Metadata...
temporalio/temporal Fetching Metadata...
theislab/moscot Fetching Metadata...
trinodb/trino Fetching Metadata...
uber/remoteshuffleservice Fetching Metadata...
vega/vega Fetching Metadata...
<class 'pandas.core.frame.DataFrame'>
Index: 123 entries, 0 to 0
Columns: 126 entries, id to license
dtypes: bool(20), int64(13), object(93)
memory usage: 109.3+ KB
None


Unnamed: 0,id,node_id,name,full_name,private,html_url,description,fork,url,forks_url,keys_url,collaborators_url,teams_url,hooks_url,issue_events_url,events_url,assignees_url,branches_url,tags_url,blobs_url,git_tags_url,git_refs_url,trees_url,statuses_url,languages_url,stargazers_url,contributors_url,subscribers_url,subscription_url,commits_url,git_commits_url,comments_url,issue_comment_url,contents_url,compare_url,merges_url,archive_url,downloads_url,issues_url,pulls_url,milestones_url,notifications_url,labels_url,releases_url,deployments_url,created_at,updated_at,pushed_at,git_url,ssh_url,clone_url,svn_url,homepage,size,stargazers_count,watchers_count,language,has_issues,has_projects,has_downloads,has_wiki,has_pages,has_discussions,forks_count,mirror_url,archived,disabled,open_issues_count,allow_forking,is_template,web_commit_signoff_required,topics,visibility,forks,open_issues,watchers,default_branch,network_count,subscribers_count,owner.login,owner.id,owner.node_id,owner.avatar_url,owner.gravatar_id,owner.url,owner.html_url,owner.followers_url,owner.following_url,owner.gists_url,owner.starred_url,owner.subscriptions_url,owner.organizations_url,owner.repos_url,owner.events_url,owner.received_events_url,owner.type,owner.site_admin,license.key,license.name,license.spdx_id,license.url,license.node_id,permissions.admin,permissions.maintain,permissions.push,permissions.triage,permissions.pull,organization.login,organization.id,organization.node_id,organization.avatar_url,organization.gravatar_id,organization.url,organization.html_url,organization.followers_url,organization.following_url,organization.gists_url,organization.starred_url,organization.subscriptions_url,organization.organizations_url,organization.repos_url,organization.events_url,organization.received_events_url,organization.type,organization.site_admin,license
0,96365647,MDEwOlJlcG9zaXRvcnk5NjM2NTY0Nw==,MaterialX,AcademySoftwareFoundation/MaterialX,False,https://github.com/AcademySoftwareFoundation/M...,MaterialX is an open standard for the exchange...,False,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,2017-07-05T22:18:25Z,2023-10-27T16:28:17Z,2023-10-28T00:29:32Z,git://github.com/AcademySoftwareFoundation/Mat...,git@github.com:AcademySoftwareFoundation/Mater...,https://github.com/AcademySoftwareFoundation/M...,https://github.com/AcademySoftwareFoundation/M...,http://www.materialx.org/,197226,1590,1590,C++,True,True,True,True,True,False,308,,False,False,119,True,False,True,"[3d-graphics, computer-graphics, materialx, ph...",public,308,119,1590,main,308,85,AcademySoftwareFoundation,40807682,MDEyOk9yZ2FuaXphdGlvbjQwODA3Njgy,https://avatars.githubusercontent.com/u/408076...,,https://api.github.com/users/AcademySoftwareFo...,https://github.com/AcademySoftwareFoundation,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,Organization,False,apache-2.0,Apache License 2.0,Apache-2.0,https://api.github.com/licenses/apache-2.0,MDc6TGljZW5zZTI=,False,False,False,False,True,AcademySoftwareFoundation,40807682,MDEyOk9yZ2FuaXphdGlvbjQwODA3Njgy,https://avatars.githubusercontent.com/u/408076...,,https://api.github.com/users/AcademySoftwareFo...,https://github.com/AcademySoftwareFoundation,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,Organization,False,
0,775131,MDEwOlJlcG9zaXRvcnk3NzUxMzE=,OpenColorIO,AcademySoftwareFoundation/OpenColorIO,False,https://github.com/AcademySoftwareFoundation/O...,A color management framework for visual effect...,False,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,2010-07-14T18:22:06Z,2023-10-27T05:57:50Z,2023-10-27T22:56:44Z,git://github.com/AcademySoftwareFoundation/Ope...,git@github.com:AcademySoftwareFoundation/OpenC...,https://github.com/AcademySoftwareFoundation/O...,https://github.com/AcademySoftwareFoundation/O...,https://opencolorio.org,62811,1603,1603,C++,True,True,True,True,True,False,404,,False,False,296,True,False,True,[opencolorio],public,404,296,1603,main,404,117,AcademySoftwareFoundation,40807682,MDEyOk9yZ2FuaXphdGlvbjQwODA3Njgy,https://avatars.githubusercontent.com/u/408076...,,https://api.github.com/users/AcademySoftwareFo...,https://github.com/AcademySoftwareFoundation,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,Organization,False,bsd-3-clause,"BSD 3-Clause ""New"" or ""Revised"" License",BSD-3-Clause,https://api.github.com/licenses/bsd-3-clause,MDc6TGljZW5zZTU=,False,False,False,False,True,AcademySoftwareFoundation,40807682,MDEyOk9yZ2FuaXphdGlvbjQwODA3Njgy,https://avatars.githubusercontent.com/u/408076...,,https://api.github.com/users/AcademySoftwareFo...,https://github.com/AcademySoftwareFoundation,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,Organization,False,
0,775131,MDEwOlJlcG9zaXRvcnk3NzUxMzE=,OpenColorIO,AcademySoftwareFoundation/OpenColorIO,False,https://github.com/AcademySoftwareFoundation/O...,A color management framework for visual effect...,False,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,2010-07-14T18:22:06Z,2023-10-27T05:57:50Z,2023-10-27T22:56:44Z,git://github.com/AcademySoftwareFoundation/Ope...,git@github.com:AcademySoftwareFoundation/OpenC...,https://github.com/AcademySoftwareFoundation/O...,https://github.com/AcademySoftwareFoundation/O...,https://opencolorio.org,62811,1603,1603,C++,True,True,True,True,True,False,404,,False,False,296,True,False,True,[opencolorio],public,404,296,1603,main,404,117,AcademySoftwareFoundation,40807682,MDEyOk9yZ2FuaXphdGlvbjQwODA3Njgy,https://avatars.githubusercontent.com/u/408076...,,https://api.github.com/users/AcademySoftwareFo...,https://github.com/AcademySoftwareFoundation,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,Organization,False,bsd-3-clause,"BSD 3-Clause ""New"" or ""Revised"" License",BSD-3-Clause,https://api.github.com/licenses/bsd-3-clause,MDc6TGljZW5zZTU=,False,False,False,False,True,AcademySoftwareFoundation,40807682,MDEyOk9yZ2FuaXphdGlvbjQwODA3Njgy,https://avatars.githubusercontent.com/u/408076...,,https://api.github.com/users/AcademySoftwareFo...,https://github.com/AcademySoftwareFoundation,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,Organization,False,
0,3533348,MDEwOlJlcG9zaXRvcnkzNTMzMzQ4,openexr,AcademySoftwareFoundation/openexr,False,https://github.com/AcademySoftwareFoundation/o...,The OpenEXR project provides the specification...,False,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,2012-02-24T06:30:00Z,2023-10-25T18:00:03Z,2023-10-25T02:27:38Z,git://github.com/AcademySoftwareFoundation/ope...,git@github.com:AcademySoftwareFoundation/opene...,https://github.com/AcademySoftwareFoundation/o...,https://github.com/AcademySoftwareFoundation/o...,http://www.openexr.com/,53344,1504,1504,C,True,True,True,True,False,False,574,,False,False,139,True,False,True,"[academy, hdr, image-processing, images, opene...",public,574,139,1504,main,574,132,AcademySoftwareFoundation,40807682,MDEyOk9yZ2FuaXphdGlvbjQwODA3Njgy,https://avatars.githubusercontent.com/u/408076...,,https://api.github.com/users/AcademySoftwareFo...,https://github.com/AcademySoftwareFoundation,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,Organization,False,bsd-3-clause,"BSD 3-Clause ""New"" or ""Revised"" License",BSD-3-Clause,https://api.github.com/licenses/bsd-3-clause,MDc6TGljZW5zZTU=,False,False,False,False,True,AcademySoftwareFoundation,40807682,MDEyOk9yZ2FuaXphdGlvbjQwODA3Njgy,https://avatars.githubusercontent.com/u/408076...,,https://api.github.com/users/AcademySoftwareFo...,https://github.com/AcademySoftwareFoundation,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,Organization,False,
0,9401161,MDEwOlJlcG9zaXRvcnk5NDAxMTYx,openvdb,AcademySoftwareFoundation/openvdb,False,https://github.com/AcademySoftwareFoundation/o...,OpenVDB - Sparse volume data structure and tools,False,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,https://api.github.com/repos/AcademySoftwareFo...,2013-04-12T18:39:57Z,2023-10-27T22:04:39Z,2023-10-27T23:20:20Z,git://github.com/AcademySoftwareFoundation/ope...,git@github.com:AcademySoftwareFoundation/openv...,https://github.com/AcademySoftwareFoundation/o...,https://github.com/AcademySoftwareFoundation/o...,http://www.openvdb.org/,54198,2267,2267,C++,True,True,True,False,True,True,593,,False,False,121,True,False,True,"[c-plus-plus, dreamworks, fx, openvdb, openvdb...",public,593,121,2267,master,593,146,AcademySoftwareFoundation,40807682,MDEyOk9yZ2FuaXphdGlvbjQwODA3Njgy,https://avatars.githubusercontent.com/u/408076...,,https://api.github.com/users/AcademySoftwareFo...,https://github.com/AcademySoftwareFoundation,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,Organization,False,mpl-2.0,Mozilla Public License 2.0,MPL-2.0,https://api.github.com/licenses/mpl-2.0,MDc6TGljZW5zZTE0,False,False,False,False,True,AcademySoftwareFoundation,40807682,MDEyOk9yZ2FuaXphdGlvbjQwODA3Njgy,https://avatars.githubusercontent.com/u/408076...,,https://api.github.com/users/AcademySoftwareFo...,https://github.com/AcademySoftwareFoundation,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,https://api.github.com/users/AcademySoftwareFo...,Organization,False,


In [10]:
repodata.info(show_counts=True, verbose=True)

<class 'pandas.core.frame.DataFrame'>
Index: 123 entries, 0 to 0
Data columns (total 126 columns):
 #    Column                            Non-Null Count  Dtype 
---   ------                            --------------  ----- 
 0    id                                123 non-null    int64 
 1    node_id                           123 non-null    object
 2    name                              123 non-null    object
 3    full_name                         123 non-null    object
 4    private                           123 non-null    bool  
 5    html_url                          123 non-null    object
 6    description                       121 non-null    object
 7    fork                              123 non-null    bool  
 8    url                               123 non-null    object
 9    forks_url                         123 non-null    object
 10   keys_url                          123 non-null    object
 11   collaborators_url                 123 non-null    object
 12   teams_url    

In [11]:
lang_report = repodata.groupby('language').agg({'name': 'count', 
                                                'stargazers_count': 'sum', 
                                                'forks_count': 'sum', 
                                                'open_issues_count': 'sum'})
repo_report = repodata[['full_name', 'name', 'homepage', 'topics', 'forks_count', 'stargazers_count', 'open_issues_count', 'description']].copy()
display(lang_report)
display(repo_report)

Unnamed: 0_level_0,name,stargazers_count,forks_count,open_issues_count
language,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C,12,25471,8228,444
C++,14,168235,27810,16421
Go,18,350987,81038,10821
HTML,5,19325,5229,741
Java,28,232023,102158,15625
JavaScript,5,15849,3178,1625
Jupyter Notebook,1,434,43,5
Mathematica,1,686,185,236
Objective-C,1,5568,1173,287
Python,11,143107,38839,18033


Unnamed: 0,full_name,name,homepage,topics,forks_count,stargazers_count,open_issues_count,description
0,AcademySoftwareFoundation/MaterialX,MaterialX,http://www.materialx.org/,"[3d-graphics, computer-graphics, materialx, ph...",308,1590,119,MaterialX is an open standard for the exchange...
0,AcademySoftwareFoundation/OpenColorIO,OpenColorIO,https://opencolorio.org,[opencolorio],404,1603,296,A color management framework for visual effect...
0,AcademySoftwareFoundation/OpenColorIO,OpenColorIO,https://opencolorio.org,[opencolorio],404,1603,296,A color management framework for visual effect...
0,AcademySoftwareFoundation/openexr,openexr,http://www.openexr.com/,"[academy, hdr, image-processing, images, opene...",574,1504,139,The OpenEXR project provides the specification...
0,AcademySoftwareFoundation/openvdb,openvdb,http://www.openvdb.org/,"[c-plus-plus, dreamworks, fx, openvdb, openvdb...",593,2267,121,OpenVDB - Sparse volume data structure and tools
0,AcademySoftwareFoundation/rez,rez,https://rez.readthedocs.io,"[environment-configuration, multi-platform, pa...",297,842,340,"An integrated package configuration, build and..."
0,apache/airflow,airflow,https://airflow.apache.org/,"[airflow, apache, apache-airflow, python, sche...",13057,32200,932,Apache Airflow - A platform to programmaticall...
0,apache/arrow,arrow,https://arrow.apache.org/,[arrow],3101,12662,3936,Apache Arrow is a multi-language toolbox for a...
0,apache/cassandra,cassandra,http://cassandra.apache.org,"[cassandra, database, java]",3499,8245,381,Mirror of Apache Cassandra
0,apache/cloudstack,cloudstack,https://cloudstack.apache.org/,"[cloud, cloudstack, iaas, infrastructure, java...",1029,1535,391,Apache CloudStack is an opensource Infrastruct...


In [13]:
repo_report.to_csv('out/osrb-repo-report.csv', index=False)
repo_report.to_json('out/osrb-repo-report.json', orient='records', lines=True)

lang_report.to_csv('out/osrb-lang-report.csv')