Copyright 2022 VMware, Inc.
SPDX-License-Identifier: BSD-2-Clause

This notebook contains details about an issue with the `codeOfConduct` object in the GitHub GraphQL API that I used to exchange data with the GH team. They have confirmed that there is an issue and the API engineering team is working on a re-implementation of the `codeOfConduct` object.

The short version is that the `codeOfConduct` object triggers an API timeout when gathering a small amount of data when compared to other queries that gather large amounts of data without timing out. 

In [1]:
import requests
import json
import pandas as pd

# Assumes your GH API token is in a file called gh_key in this directory
with open('gh_key', 'r') as kf:
    api_token = kf.readline().rstrip() # remove newline & trailing whitespace

# Example with large amounts of data

Example of a query that gathers a large amount of data without triggering a timeout in the API. It does not include any data from the `codeOfConduct` object.

In [2]:
def complex_query(after_cursor = None):
    return """query RepoQuery($org_name: String!) {
             organization(login: $org_name) {
               repositories (first: 100 after: AFTER){
                 pageInfo {
                   hasNextPage
                   endCursor
                 }
                 nodes { 
                   nameWithOwner
                   name
                   licenseInfo {
                     name
                   }
                   isPrivate
                   isFork
                   isEmpty
                   isArchived
                   forkCount
                   stargazerCount
                   createdAt
                   updatedAt
                   pushedAt
                   defaultBranchRef {
                     name 
                     target{
                        ... on Commit{
                            history(first:1){
                        edges{
                            node{
                                ... on Commit{
                                    committedDate
                                    author{
                                      name
                                      email
                                      user{
                                        login
                                      }
                                    }
                            }
                        }
                    }
                }
                   }
                 }
               }
              }
              }
              }
            }""".replace(
        "AFTER", '"{}"'.format(after_cursor) if after_cursor else "null"
    )


In [3]:
org_name = "vmware"
url = 'https://api.github.com/graphql'
headers = {'Authorization': 'token %s' % api_token}

has_next_page = True
after_cursor = None

repo_info_df = pd.DataFrame()

while has_next_page:

    query = complex_query(after_cursor)

    variables = {"org_name": org_name}
    r = requests.post(url=url, json={'query': query, 'variables': variables}, headers=headers)
    json_data = json.loads(r.text)

    df_temp = pd.DataFrame(json_data['data']['organization']['repositories']['nodes'])
    repo_info_df = repo_info_df.append(df_temp, ignore_index=True)

    has_next_page = json_data["data"]["organization"]["repositories"]["pageInfo"]["hasNextPage"]

    after_cursor = json_data["data"]["organization"]["repositories"]["pageInfo"]["endCursor"]

repo_info_df

Unnamed: 0,nameWithOwner,name,licenseInfo,isPrivate,isFork,isEmpty,isArchived,forkCount,stargazerCount,createdAt,updatedAt,pushedAt,defaultBranchRef
0,vmware/pg_rewind,pg_rewind,,False,False,False,False,20,126,2013-05-23T10:45:43Z,2022-01-13T10:15:53Z,2020-07-15T07:20:13Z,"{'name': 'master', 'target': {'history': {'edg..."
1,vmware/pyvmomi,pyvmomi,{'name': 'Apache License 2.0'},False,False,False,False,732,1886,2013-12-13T17:30:30Z,2022-01-27T03:48:17Z,2021-10-14T20:36:08Z,"{'name': 'master', 'target': {'history': {'edg..."
2,vmware/pyvmomi-community-samples,pyvmomi-community-samples,{'name': 'Apache License 2.0'},False,False,False,False,844,860,2014-04-24T20:31:56Z,2022-01-27T06:16:13Z,2022-01-14T01:06:43Z,"{'name': 'master', 'target': {'history': {'edg..."
3,vmware/open-vm-tools,open-vm-tools,,False,False,False,False,357,1651,2014-04-25T21:30:54Z,2022-01-27T03:33:49Z,2022-01-21T01:24:38Z,"{'name': 'master', 'target': {'history': {'edg..."
4,vmware/upgrade-framework,upgrade-framework,{'name': 'Other'},False,False,False,False,12,18,2014-06-16T17:22:11Z,2021-12-06T23:09:20Z,2021-12-09T20:35:22Z,"{'name': 'master', 'target': {'history': {'edg..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,vmware/vmware-go-kcl-v2,vmware-go-kcl-v2,{'name': 'MIT License'},False,False,False,False,1,1,2021-11-30T15:05:11Z,2022-01-07T02:25:00Z,2022-01-07T02:24:58Z,"{'name': 'main', 'target': {'history': {'edges..."
207,vmware/.github,.github,{'name': 'Other'},False,False,False,False,4,1,2021-12-03T20:18:21Z,2021-12-15T15:48:36Z,2021-12-15T15:48:33Z,"{'name': 'main', 'target': {'history': {'edges..."
208,vmware/app-control-event-kernel-module,app-control-event-kernel-module,{'name': 'GNU General Public License v2.0'},False,False,False,False,0,1,2021-12-20T16:49:03Z,2022-01-10T15:00:15Z,2021-12-20T20:08:31Z,"{'name': 'main', 'target': {'history': {'edges..."
209,vmware/ml-ops-platform-for-vsphere,ml-ops-platform-for-vsphere,{'name': 'Apache License 2.0'},True,False,False,False,0,0,2022-01-10T12:34:26Z,2022-01-10T12:34:40Z,2022-01-10T12:34:38Z,


# Code of Conduct Example

Note: this will work if you shorten `first: 100` to `first: 20`, but this is a relatively small amount of data that should not timeout unless there is a bug or serious performance issue within the `codeOfConduct` object.

In [11]:
# fails on vmware, bitnami
# works on vmware-tanzu, concourse, carbonblack
def make_query():
    return """query RepoQuery($org_name: String!) {
             organization(login: $org_name) {
               repositories (first: 100){
                 pageInfo {
                   hasNextPage
                   endCursor
                 }
                 nodes {
                   name
                   codeOfConduct{
                     url
                   }
                   createdAt
                 }
                }
               }
            }"""

In [14]:
def make_query():
    return """query MyQuery($org_name: String!) {
  organization(login: $org_name) {
    repositories(first: 100) {
      pageInfo {
        hasNextPage
        endCursor
      }
      nodes {
        codeOfConduct {
          url
        }
        createdAt
        name
      }
    }
  }
}"""


# Works fine for some orgs: vmware-tanzu example

In [5]:
# fails on vmware, bitnami
# works on vmware-tanzu, concourse, carbonblack
org_name = "vmware-tanzu"

In [6]:
url = 'https://api.github.com/graphql'
headers = {'Authorization': 'token %s' % api_token}

query = make_query()

variables = {"org_name": org_name}
r = requests.post(url=url, json={'query': query, 'variables': variables}, headers=headers)
json_data_tanzu = json.loads(r.text)
print(json_data_tanzu)

{'data': {'organization': {'repositories': {'pageInfo': {'hasNextPage': True, 'endCursor': 'Y3Vyc29yOnYyOpHOFdNqbg=='}, 'nodes': [{'name': 'sonobuoy', 'codeOfConduct': {'url': 'https://github.com/vmware-tanzu/sonobuoy/blob/main/CODE_OF_CONDUCT.md'}, 'createdAt': '2017-07-26T18:27:09Z'}, {'name': 'velero', 'codeOfConduct': {'url': 'https://github.com/vmware-tanzu/velero/blob/main/CODE_OF_CONDUCT.md'}, 'createdAt': '2017-08-02T17:22:11Z'}, {'name': 'velero-plugin-example', 'codeOfConduct': {'url': 'https://github.com/vmware-tanzu/velero-plugin-example/blob/main/CODE_OF_CONDUCT.md'}, 'createdAt': '2017-11-28T20:25:03Z'}, {'name': 'tgik', 'codeOfConduct': {'url': 'https://github.com/vmware-tanzu/tgik/blob/master/CODE-OF-CONDUCT.md'}, 'createdAt': '2018-05-07T18:11:06Z'}, {'name': 'carvel-kwt', 'codeOfConduct': {'url': 'https://github.com/vmware-tanzu/carvel-kwt/blob/develop/CODE_OF_CONDUCT.md'}, 'createdAt': '2018-09-24T17:59:19Z'}, {'name': 'thepodlets', 'codeOfConduct': {'url': 'https://

# Fails on other orgs: vmware org example

In [7]:
# fails on vmware, bitnami
# works on vmware-tanzu, concourse, carbonblack
org_name = "vmware"

In [8]:
url = 'https://api.github.com/graphql'
headers = {'Authorization': 'token %s' % api_token}

query = make_query()

variables = {"org_name": org_name}
r = requests.post(url=url, json={'query': query, 'variables': variables}, headers=headers)
json_data_vmware = json.loads(r.text)
print(json_data_vmware)

{'data': None, 'errors': [{'message': 'Something went wrong while executing your query. This may be the result of a timeout, or it could be a GitHub bug. Please include `F00A:6919:67C008:6E8EA6:61F409AE` when reporting this issue.'}]}


## Note: If you remove the codeOfConduct, the rest of the query works fine.

In [9]:
# fails on vmware, bitnami
# works on vmware-tanzu, concourse, carbonblack
def make_query_no_coc():
    return """query RepoQuery($org_name: String!) {
             organization(login: $org_name) {
               repositories (first: 100){
                 pageInfo {
                   hasNextPage
                   endCursor
                 }
                 nodes {
                   name
                   createdAt
                 }
                }
               }
            }"""

In [10]:
url = 'https://api.github.com/graphql'
headers = {'Authorization': 'token %s' % api_token}

query = make_query_no_coc()

variables = {"org_name": org_name}
r = requests.post(url=url, json={'query': query, 'variables': variables}, headers=headers)
json_data_no_coc = json.loads(r.text)
print(json_data_no_coc)

{'data': {'organization': {'repositories': {'pageInfo': {'hasNextPage': True, 'endCursor': 'Y3Vyc29yOnYyOpHOCSW2nA=='}, 'nodes': [{'name': 'pg_rewind', 'createdAt': '2013-05-23T10:45:43Z'}, {'name': 'pyvmomi', 'createdAt': '2013-12-13T17:30:30Z'}, {'name': 'pyvmomi-community-samples', 'createdAt': '2014-04-24T20:31:56Z'}, {'name': 'open-vm-tools', 'createdAt': '2014-04-25T21:30:54Z'}, {'name': 'upgrade-framework', 'createdAt': '2014-06-16T17:22:11Z'}, {'name': 'workflowTools', 'createdAt': '2014-07-18T22:16:00Z'}, {'name': 'govmomi', 'createdAt': '2014-08-12T16:15:08Z'}, {'name': 'pyvcloud', 'createdAt': '2014-11-12T19:36:04Z'}, {'name': 'vmw-guestinfo', 'createdAt': '2014-11-29T23:07:44Z'}, {'name': 'vcd-cli', 'createdAt': '2014-12-05T18:52:29Z'}, {'name': 'open-vmdk', 'createdAt': '2014-12-15T17:10:11Z'}, {'name': 'tdnf', 'createdAt': '2015-02-26T00:44:11Z'}, {'name': 'likewise-open', 'createdAt': '2015-02-26T19:58:04Z'}, {'name': 'photon', 'createdAt': '2015-04-15T17:22:47Z'}, {'nam