# GithubGraphQL_Issues

In [1]:
import requests
import pandas as pd

In [2]:
# Open and read file
with open ('../../../Auth_Keys/graphql_api_auth.txt') as file:
    token = file.read()
    
# Specify the Authorization code
headers = {"Authorization": 'Bearer ' + token}

In [3]:
# Function to use requests.post to make an API call
def run_query(query): 
    request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers)
    if request.status_code == 200:
        return request.json()
    else:
        raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))

In [4]:
def dates(date, num_days):
    # Change to date time: Year, Month, Day
    start_date = pd.to_datetime(date)
    
    # Add number of days 
    days = pd.DateOffset(days=num_days)
    
    #Get ending date
    end_date = (start_date + days).strftime('%Y, %m, %d').replace(', ', '-')
    
    return end_date

In [5]:
def to_df(results):
    
    # Get the keys of nodes.
    data = results.get('data').get('search').get('nodes')
    
    # Put the data into a dataframe
    df_issue = pd.DataFrame(data)
    
    # Get rate limit of queries and put it into a dataframe
    viewer = results.get('data')
    df_rate_limit = pd.DataFrame(viewer).dropna(subset=['rateLimit']).drop(columns=['search'])
    
    return df_issue, df_rate_limit

In [33]:
def concat_dfs(df):
    if limit == 5000:
        return df
    else:
        # Merge previous df and new df
        return pd.concat([df, merge_df])

In [34]:
# Check limit first before running
limit = 5000

# Need dates - Enter start and output end
starting_date = '2010-09-28'

# Number of days
num_days = 7

while limit >= 0:
    
    # Get ending date
    ending_date = dates(starting_date, num_days)
    
    # Query
    query = '''
    {{
      search(first:100, query:"repo:pandas-dev/pandas created:{date_1}..{date_2} type:issue", type:ISSUE) {{
        nodes {{
          ... on Issue {{
            createdAt
            closedAt
            updatedAt
            title
            number
            author {{
              login
              ... on User {{
                company
              }}
            }}
            authorAssociation
            state
            bodyText
            comments(first:20) {{
              totalCount
              edges {{
                node {{
                  author {{
                    login
                    ... on User {{
                      company
                    }}
                  }}
                  createdAt
                  authorAssociation
                  bodyText
                }}
              }}
            }}
          }}
        }}
      }}
      rateLimit {{
        limit
        cost
        remaining
        resetAt
      }}
    }}
    '''
    
    # Variables for inputted for starting date and ending date
    variables = {
        'date_1': starting_date,
        'date_2': ending_date
    }
    
    # Results from the query
    results = run_query(query.format(**variables))
    
    # Convert data and rate limit to a df
    df_data, df_rate_limit = to_df(results)
    
    # Merge df_data
    merge_df = concat_dfs(df_data)
    
    # Change limit
    limit = df_rate_limit.loc['remaining', 'rateLimit']
    
    # Change starting date to ending date + 1
    starting_date = dates(ending_date, 1)

# Check Limit

In [51]:
query = '''
{
  viewer {
    login
  }
  rateLimit {
    limit
    cost
    remaining
    resetAt
  }
}
'''
result = run_query(query)
result

{'data': {'viewer': {'login': 'dustiny5'},
  'rateLimit': {'limit': 5000,
   'cost': 1,
   'remaining': 4990,
   'resetAt': '2019-08-23T20:03:55Z'}}}