In [1]:
import requests
import pandas as pd

In [2]:
# Open and read file
with open ('../../../Auth_Keys/graphql_api_auth.txt') as file:
    token = file.read()
    
# Specify the Authorization code
headers = {"Authorization": 'Bearer ' + token}

In [3]:
# Function to use requests.post to make an API call
def run_query(query): 
    request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers)
    if request.status_code == 200:
        return request.json()
    else:
        raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))

In [4]:
# Query star gazers
query = '''
{
  repositoryOwner(login: "pandas-dev") {
    id
    login
    repository(name: "pandas") {
      id
      name
      createdAt
      updatedAt
      description
      licenseInfo {
        spdxId
      }
      stargazers(first:100) {
        totalCount
        pageInfo {
          endCursor
          hasNextPage
        }
        edges {
          starredAt
          node {
            createdAt
            updatedAt
            id
            login
            company
          }
        }
      }
    }
  }
  rateLimit {
    limit
    cost
    remaining
    resetAt
  }
}
'''

In [5]:
query_2 = '''
{{
  repositoryOwner(login: "pandas-dev") {{
    id
    login
    repository(name: "pandas") {{
      id
      name
      createdAt
      updatedAt
      description
      licenseInfo {{
        spdxId
      }}
      stargazers(first:100, after:"{end_cursor}") {{
        totalCount
        pageInfo {{
          endCursor
          hasNextPage
        }}
        edges {{
          starredAt
          node {{
            createdAt
            updatedAt
            id
            login
            company
          }}
        }}
      }}
    }}
  }}
  rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

variables = {
    "end_cursor": ""
}

In [6]:
def to_df(result):
    
    # Get the keys of nodes.
    data = result['data']['repositoryOwner']['repository']['stargazers']['edges']
    
    # Put the data into a dataframe
    df_star = pd.DataFrame(data)
    
    return df_star

In [7]:
def get_page_info(result):
    cur = result['data']['repositoryOwner']['repository']['stargazers']['pageInfo']['endCursor']
    has_cur = result['data']['repositoryOwner']['repository']['stargazers']['pageInfo']['hasNextPage']
    return cur, has_cur

In [8]:
def result_query(query):
    # Run first query
    result = run_query(query)

    # Save to DF
    df_star = to_df(result)

    # Get end cursor and has cursor
    variables['end_cursor'], has_cursor = get_page_info(result)

    # Limit
    limit = result['data']['rateLimit']['remaining']
    
    return df_star, has_cursor, limit

In [9]:
import pandas as pd

In [13]:
%%time

# Run first query
df_star, has_cursor, limit = result_query(query)

print('Starting limit is: ', limit)
print('Start cursor: ', has_cursor, '\n')

# Run While Loop below
while has_cursor and (limit >= 0):
    
    # Run next queries
    df_star_2, has_cursor, limit = result_query(query_2.format(**variables))

    # Concat to existing df_star
    df_star = pd.concat([df_star, df_star_2])
    
    # Print limit and cursor
    print('Next limit is: ', limit)
    print('Next cursor: ', has_cursor, '\n')

Starting limit is:  4071
Start cursor:  True 

Next limit is:  4070
Next cursor:  True 

Next limit is:  4069
Next cursor:  True 

Next limit is:  4068
Next cursor:  True 



KeyboardInterrupt: 

In [12]:
df_star

Unnamed: 0,node,starredAt
0,"{'createdAt': '2008-02-17T20:44:47Z', 'updated...",2010-08-24T01:37:33Z
1,"{'createdAt': '2008-02-20T18:28:22Z', 'updated...",2010-08-24T01:37:33Z
2,"{'createdAt': '2008-02-21T21:46:08Z', 'updated...",2010-08-24T01:37:33Z
3,"{'createdAt': '2008-02-27T11:10:03Z', 'updated...",2010-08-24T01:37:33Z
4,"{'createdAt': '2008-02-27T22:57:03Z', 'updated...",2010-08-24T01:37:33Z
5,"{'createdAt': '2008-02-28T00:45:42Z', 'updated...",2010-08-24T01:37:33Z
6,"{'createdAt': '2008-02-28T00:51:51Z', 'updated...",2010-08-24T01:37:33Z
7,"{'createdAt': '2008-02-28T03:05:37Z', 'updated...",2010-08-24T01:37:33Z
8,"{'createdAt': '2008-02-28T04:49:28Z', 'updated...",2010-08-24T01:37:33Z
9,"{'createdAt': '2008-02-28T10:48:19Z', 'updated...",2010-08-24T01:37:33Z


In [110]:
query_0 = """
{
  repositoryOwner(login: "pandas-dev") {
    id
    login
    repository(name: "pandas") {
      id
      name
      createdAt
      updatedAt
      description
      licenseInfo {
        spdxId
      }
        forks(first: 10, after:"Y3Vyc29yOnYyOpHOACPfBA==") {
        totalCount
        pageInfo {
          endCursor 
          }
        nodes {
          createdAt
          isFork
        }
      }
      watchers(first: 10) {
        totalCount
      }
      stargazers(first: 10, after:"Y3Vyc29yOnYyOpIAzgAC-AA=") {
        totalCount
        pageInfo {
          endCursor
        }
        edges{
          starredAt
          node {
            createdAt
            updatedAt
            login
          }
        }
      }
      releases(first:10, after:"Y3Vyc29yOnYyOpHOAAsMCg==") {
        totalCount
        pageInfo {
          endCursor
        }
        nodes {
          author {
            login
          }
          id
          createdAt
          publishedAt
          name
          description 
        }
      }
    }
  }
}
"""

In [111]:
result_0 = run_query(query_0)
result_0

{'data': {'repositoryOwner': {'id': 'MDEyOk9yZ2FuaXphdGlvbjIxMjA2OTc2',
   'login': 'pandas-dev',
   'repository': {'id': 'MDEwOlJlcG9zaXRvcnk4NTgxMjc=',
    'name': 'pandas',
    'createdAt': '2010-08-24T01:37:33Z',
    'updatedAt': '2019-08-27T19:46:12Z',
    'description': 'Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more',
    'licenseInfo': {'spdxId': 'BSD-3-Clause'},
    'forks': {'totalCount': 8084,
     'pageInfo': {'endCursor': 'Y3Vyc29yOnYyOpHOACnmtA=='},
     'nodes': [{'createdAt': '2011-09-12T23:11:40Z', 'isFork': True},
      {'createdAt': '2011-09-13T15:36:42Z', 'isFork': True},
      {'createdAt': '2011-09-13T16:28:08Z', 'isFork': True},
      {'createdAt': '2011-09-14T16:16:42Z', 'isFork': True},
      {'createdAt': '2011-09-19T15:18:04Z', 'isFork': True},
      {'createdAt': '2011-09-26T01:09:50Z', 'isFork': True},
      {'createdAt': '2011-10-

In [117]:
query_pat = '''
{{
   repositoryOwner(login: "pandas-dev") {{
    id
    login
    repository(name: "pandas") {{
      id
      name
      createdAt
      updatedAt
      description
      licenseInfo {{
        spdxId
      }}
      forks(first:10, after:"{data_1}") {{
        totalCount
        pageInfo {{
          endCursor 
          }}
        nodes {{
          createdAt
          isFork
        }}
      }}
      watchers(first:10) {{
         totalCount
      }}
      stargazers(first:10, after:"{data_2}") {{
        totalCount
        pageInfo {{
          endCursor
        }}
        edges {{
           starredAt
           node {{
              createdAt
              updatedAt
              login
           }}
         }}
       }}
      releases(first:10, after:"{data_3}") {{
         totalCount
        pageInfo {{
           endCursor
        }}
        nodes {{
          author {{
            login
          }}
          id
          createdAt
          publishedAt
          name
          description 
        }}
      }}
    }}
  }}
}}
'''

variables_pat = {
    'data_1': "",
    'data_2': "",
    'data_3': ""
}

In [118]:
variables_pat['data_1'] = result_0['data']['repositoryOwner']['repository']['forks']['pageInfo']['endCursor']
variables_pat['data_1']

'Y3Vyc29yOnYyOpHOACnmtA=='

In [119]:
variables_pat['data_2'] = result_0['data']['repositoryOwner']['repository']['stargazers']['pageInfo']['endCursor']
variables_pat['data_2']

'Y3Vyc29yOnYyOpIAzgAINsE='

In [120]:
variables_pat['data_3'] = result_0['data']['repositoryOwner']['repository']['releases']['pageInfo']['endCursor']
variables_pat['data_3']

'Y3Vyc29yOnYyOpHOACfQUg=='

In [121]:
run_query(query_pat.format(**variables_pat))

{'data': {'repositoryOwner': {'id': 'MDEyOk9yZ2FuaXphdGlvbjIxMjA2OTc2',
   'login': 'pandas-dev',
   'repository': {'id': 'MDEwOlJlcG9zaXRvcnk4NTgxMjc=',
    'name': 'pandas',
    'createdAt': '2010-08-24T01:37:33Z',
    'updatedAt': '2019-08-27T19:46:12Z',
    'description': 'Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more',
    'licenseInfo': {'spdxId': 'BSD-3-Clause'},
    'forks': {'totalCount': 8084,
     'pageInfo': {'endCursor': 'Y3Vyc29yOnYyOpHOAC5mVQ=='},
     'nodes': [{'createdAt': '2011-11-14T18:16:23Z', 'isFork': True},
      {'createdAt': '2011-11-17T16:59:14Z', 'isFork': True},
      {'createdAt': '2011-11-18T07:12:49Z', 'isFork': True},
      {'createdAt': '2011-11-19T00:00:38Z', 'isFork': True},
      {'createdAt': '2011-11-22T23:52:06Z', 'isFork': True},
      {'createdAt': '2011-12-02T22:39:19Z', 'isFork': True},
      {'createdAt': '2011-12-

In [98]:
variables_pat.values()

dict_values(['Y3Vyc29yOnYyOpHOACPfBA==', 'Y3Vyc29yOnYyOpIAzgAC-AA=', 'Y3Vyc29yOnYyOpHOAAsMCg=='])