# GithubGraphQL_Issues

In [1]:
import requests
import pandas as pd

In [2]:
# Open and read file
with open ('../../../Auth_Keys/graphql_api_auth.txt') as file:
    token = file.read()
    
# Specify the Authorization code
headers = {"Authorization": 'Bearer ' + token}

In [3]:
# Function to use requests.post to make an API call
def run_query(query): 
    request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers)
    if request.status_code == 200:
        return request.json()
    else:
        raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))

In [4]:
def dates(date, num_days):
    # Change to date time: Year, Month, Day
    start_date = pd.to_datetime(date)
    
    # Add number of days 
    days = pd.DateOffset(days=num_days)
    
    #Get ending date
    end_date = (start_date + days).strftime('%Y, %m, %d').replace(', ', '-')
    
    return end_date

In [5]:
def to_df(results):
    
    # Get the keys of nodes.
    data = results.get('data').get('search').get('nodes')
    
    # Put the data into a dataframe
    df_issue = pd.DataFrame(data)
    
    # Get rate limit of queries and put it into a dataframe
    viewer = results.get('data')
    df_rate_limit = pd.DataFrame(viewer).dropna(subset=['rateLimit']).drop(columns=['search'])
    
    return df_issue, df_rate_limit

In [6]:
def rate_limit():
    # Query Limit
    query_limit = '''
    {
      viewer {
        login
      }
      rateLimit {
        limit
        cost
        remaining
        resetAt
      }
    }
    '''
    
    # Result of query_limit
    result = run_query(query_limit)

    # Get current limit
    limit = result['data']['rateLimit']['remaining']
    lm = result['data']['rateLimit']['remaining']
    
    return limit, lm

In [7]:
def concat_dfs(df, lm):
    if limit == lm:
        return df
    else:
        # Merge previous df and new df
        return pd.concat([df, merge_df])

### Comment out which limit you want to use

In [8]:
%%time

# Use this limit if unsure
# limit, lm = rate_limit()

# Use this limit if you're sure all request has been an hour. Make sure limit and lm is the same #
limit = 5000
lm = 5000

# Test limit. limit and lm needs to be the same number
# limit = 2
# lm = 2

# Enter starting date
starting_date = '2010-09-28'

# Number of days
num_days = 7

while limit >= 0:
    
    # Get ending date
    ending_date = dates(starting_date, num_days)
    
    # Query
    query = '''
    {{
      search(first:100, query:"repo:pandas-dev/pandas created:{date_1}..{date_2} type:issue", type:ISSUE) {{
        nodes {{
          ... on Issue {{
            createdAt
            closedAt
            updatedAt
            title
            number
            author {{
              login
              ... on User {{
                company
              }}
            }}
            authorAssociation
            state
            bodyText
            comments(first:20) {{
              totalCount
              edges {{
                node {{
                  author {{
                    login
                    ... on User {{
                      company
                    }}
                  }}
                  createdAt
                  authorAssociation
                  bodyText
                }}
              }}
            }}
          }}
        }}
      }}
      rateLimit {{
        limit
        cost
        remaining
        resetAt
      }}
    }}
    '''
    
    # Variables inputted for starting date and ending date
    variables = {
        'date_1': starting_date,
        'date_2': ending_date
    }
    
    # Results from the query
    results = run_query(query.format(**variables))
    
    # Convert data and rate limit to a df
    df_data, df_rate_limit = to_df(results)
    
    # Merge df_data
    merge_df = concat_dfs(df_data, lm)
    
    # Change limit - Probably not needed since each call is cost 1
    #limit = df_rate_limit.loc['remaining', 'rateLimit']
    limit -= 1
    
    # Change starting date to ending date + 1
    starting_date = dates(ending_date, 1)

ConnectionError: HTTPSConnectionPool(host='api.github.com', port=443): Max retries exceeded with url: /graphql (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x0000025618031198>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'))

In [10]:
merge_df.to_csv('Github_Issue_Pandas.csv', index=False)

# Check Limit

In [11]:
query_limit = '''
{
  viewer {
    login
  }
  rateLimit {
    limit
    cost
    remaining
    resetAt
  }
}
'''
result = run_query(query_limit)
result

{'data': {'viewer': {'login': 'dustiny5'},
  'rateLimit': {'limit': 5000,
   'cost': 1,
   'remaining': 4962,
   'resetAt': '2019-08-24T00:55:36Z'}}}

# Test concat and limit

In [42]:
limit = result['data']['rateLimit']['remaining']
limit

959

In [43]:
def concat_dfs(df):
    if limit == limit:
        print(limit)
    else:
        print('False')

In [45]:
print(concat_dfs(limit))
# Change limit
limit -=1
print(concat_dfs(limit))

959
None
958
None


# Automate

In [None]:
# https://stackoverflow.com/questions/15088037/python-script-to-do-something-at-the-same-time-every-day