In [1]:
import requests
import pandas as pd

In [2]:
# Open and read file
with open ('../../../Auth_Keys/graphql_api_auth.txt') as file:
    token = file.read()
    
# Specify the Authorization code
headers = {"Authorization": 'Bearer ' + token}

In [3]:
# Function to use requests.post to make an API call
def run_query(query): 
    request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers)
    if request.status_code == 200:
        return request.json()
    else:
        raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))

In [17]:
# Query Pull Request
query = '''
{
  repositoryOwner(login: "pandas-dev") {
    repository(name: "pandas") {
      pullRequests(first: 100) {
        pageInfo {
          endCursor
          hasNextPage
        }
        nodes {
          createdAt
          updatedAt
          title
          mergedBy {
            login
          }
          author {
            login
            ... on User {
              company
            }
          }
          authorAssociation
          files {
            totalCount
          }
          state
          resourcePath
          bodyText
          comments(first: 25) {
            totalCount
            nodes {
              author {
                login
                ... on User {
                  company
                }
              }
              authorAssociation
              bodyText
            }
          }
        }
      }
    }
  }
  rateLimit {
    limit
    cost
    remaining
    resetAt
  }
}


'''

query_2 = '''
{{
  repositoryOwner(login: "pandas-dev") {{
    repository(name: "pandas") {{
      pullRequests(first: 100, after:"{end_cursor}") {{
        pageInfo {{
          endCursor
          hasNextPage
        }}
        nodes {{
          createdAt
          updatedAt
          title
          mergedBy {{
            login
          }}
          author {{
            login
            ... on User {{
              company
            }}
          }}
          authorAssociation
          files {{
            totalCount
          }}
          state
          resourcePath
          bodyText
          comments(first: 25) {{
            totalCount
            nodes {{
              author {{
                login
                ... on User {{
                  company
                }}
              }}
              authorAssociation
              bodyText
            }}
          }}
        }}
      }}
    }}
  }}
  rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}

'''

variables = {
    "end_cursor": ""
}

In [11]:
def to_df(result):
    
    # Get the keys of nodes.
    data = result['data']['repositoryOwner']['repository']['pullRequests']['nodes']
    
    # Put the data into a dataframe
    df_pr = pd.DataFrame(data)
    
    return df_pr

def get_page_info(result):
    cur = result['data']['repositoryOwner']['repository']['pullRequests']['pageInfo']['endCursor']
    has_cur = result['data']['repositoryOwner']['repository']['pullRequests']['pageInfo']['hasNextPage']
    return cur, has_cur

def result_query(query):
    # Run first query
    result = run_query(query)

    # Save to DF
    df_pr = to_df(result)

    # Get end cursor and has cursor
    variables['end_cursor'], has_cursor = get_page_info(result)

    # Limit
    limit = result['data']['rateLimit']['remaining']
    
    return df_pr, has_cursor, limit

In [27]:
%%time
##### Comment Out this section if there's a 443 error #####

# Run first query
df_pr, has_cursor, limit = result_query(query)

print('Starting limit is: ', limit)
print('Start cursor: ', has_cursor, '\n')

##### Comment Out this section if there's a 443 error #####

# Run While Loop below
while has_cursor and (limit >= 0):
    
    # Run next queries
    df_pr_2, has_cursor, limit = result_query(query_2.format(**variables))

    # Concat to existing df_star
    df_pr = pd.concat([df_pr, df_pr_2])
    
    # Print limit and cursor
    print('Next limit is: ', limit)
    print('Next cursor: ', has_cursor, '\n')

Next limit is:  4999
Next cursor:  True 

Next limit is:  4998
Next cursor:  True 

Next limit is:  4997
Next cursor:  True 

Next limit is:  4996
Next cursor:  True 

Next limit is:  4995
Next cursor:  True 

Next limit is:  4994
Next cursor:  True 

Next limit is:  4993
Next cursor:  True 

Next limit is:  4992
Next cursor:  True 

Next limit is:  4991
Next cursor:  True 



Exception: Query failed to run by returning code of 502. 
{
  repositoryOwner(login: "pandas-dev") {
    repository(name: "pandas") {
      pullRequests(first: 100, after:"Y3Vyc29yOnYyOpHOAOQMmg==") {
        pageInfo {
          endCursor
          hasNextPage
        }
        nodes {
          createdAt
          updatedAt
          title
          mergedBy {
            login
          }
          author {
            login
            ... on User {
              company
            }
          }
          authorAssociation
          files {
            totalCount
          }
          state
          resourcePath
          bodyText
          comments(first: 50) {
            totalCount
            nodes {
              author {
                login
                ... on User {
                  company
                }
              }
              authorAssociation
              bodyText
            }
          }
        }
      }
    }
  }
  rateLimit {
    limit
    cost
    remaining
    resetAt
  }
}



In [28]:
df_pr.shape

(2300, 11)

In [None]:
df_pr.to_pickle('../../../Files/df_pr_pandas.pk1')