# GithubGraphQL_Issues

In [1]:
import requests
import pandas as pd

In [2]:
# Open and read file
with open ('../../../Auth_Keys/graphql_api_auth.txt') as file:
    token = file.read()
    
# Specify the Authorization code
headers = {"Authorization": 'Bearer ' + token}

In [3]:
# Function to use requests.post to make an API call
def run_query(query): 
    request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers)
    if request.status_code == 200:
        return request.json()
    else:
        raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))

In [4]:
def dates(date, num_days):
    # Change to date time: Year, Month, Day
    start_date = pd.to_datetime(date)
    
    # Add number of days 
    days = pd.DateOffset(days=num_days)
    
    #Get ending date
    end_date = (start_date + days).strftime('%Y, %m, %d').replace(', ', '-')
    
    return end_date

In [5]:
def to_df(results):
    
    # Get the keys of nodes.
    data = results.get('data').get('search').get('nodes')
    
    # Put the data into a dataframe
    df_issue = pd.DataFrame(data)
    
    # Get rate limit of queries and put it into a dataframe
    viewer = results.get('data')
    df_rate_limit = pd.DataFrame(viewer).dropna(subset=['rateLimit']).drop(columns=['search'])
    
    return df_issue, df_rate_limit

In [20]:
def rate_limit():
    # Query Limit
    query_limit = '''
    {
      viewer {
        login
      }
      rateLimit {
        limit
        cost
        remaining
        resetAt
      }
    }
    '''
    
    # Result of query_limit
    result = run_query(query_limit)

    # Get current limit
    limit = result['data']['rateLimit']['remaining']
    lm = result['data']['rateLimit']['remaining']
    
    return limit, lm

In [17]:
def concat_dfs(df, lm):
    if limit == lm:
        return df
    else:
        # Merge previous df and new df
        return pd.concat([df, merge_df])

### Comment out which limit you want to use

In [18]:
%%time

# Use this limit if unsure
# limit, lm = rate_limit()

# Use this limit if you're sure all request has been an hour. Make sure limit and lm is the same #
# limit = 5000
# lm = 5000

# Test limit. limit and lm needs to be the same number
limit = 2
lm = 2

# Enter starting date
starting_date = '2010-09-28'

# Number of days
num_days = 7

while limit >= 0:
    
    # Get ending date
    ending_date = dates(starting_date, num_days)
    
    # Query
    query = '''
    {{
      search(first:100, query:"repo:pandas-dev/pandas created:{date_1}..{date_2} type:issue", type:ISSUE) {{
        nodes {{
          ... on Issue {{
            createdAt
            closedAt
            updatedAt
            title
            number
            author {{
              login
              ... on User {{
                company
              }}
            }}
            authorAssociation
            state
            bodyText
            comments(first:20) {{
              totalCount
              edges {{
                node {{
                  author {{
                    login
                    ... on User {{
                      company
                    }}
                  }}
                  createdAt
                  authorAssociation
                  bodyText
                }}
              }}
            }}
          }}
        }}
      }}
      rateLimit {{
        limit
        cost
        remaining
        resetAt
      }}
    }}
    '''
    
    # Variables inputted for starting date and ending date
    variables = {
        'date_1': starting_date,
        'date_2': ending_date
    }
    
    # Results from the query
    results = run_query(query.format(**variables))
    
    # Convert data and rate limit to a df
    df_data, df_rate_limit = to_df(results)
    
    # Merge df_data
    merge_df = concat_dfs(df_data, lm)
    
    # Change limit - Probably not needed since each call is cost 1
    #limit = df_rate_limit.loc['remaining', 'rateLimit']
    limit -= 1
    
    # Change starting date to ending date + 1
    starting_date = dates(ending_date, 1)

Wall time: 1.82 s


In [19]:
merge_df

Unnamed: 0,author,authorAssociation,bodyText,closedAt,comments,createdAt,number,state,title,updatedAt
0,"{'login': 'wesm', 'company': '@ursa-labs / @rs...",MEMBER,Need to investigate (user notified),2011-09-25T05:17:23Z,"{'totalCount': 1, 'edges': [{'node': {'author'...",2010-10-12T16:15:10Z,18,CLOSED,weights option may not be working in pandas.st...,2011-12-31T16:53:33Z
1,"{'login': 'wesm', 'company': '@ursa-labs / @rs...",MEMBER,Across DataFrame and Wide/LongPanel classes,2011-07-08T22:15:57Z,"{'totalCount': 1, 'edges': [{'node': {'author'...",2010-10-12T16:13:55Z,17,CLOSED,Better support for mixed-type data,2011-07-08T22:15:57Z
2,"{'login': 'wesm', 'company': '@ursa-labs / @rs...",MEMBER,Floating point error can result in incorrect o...,2011-06-23T04:46:14Z,"{'totalCount': 1, 'edges': [{'node': {'author'...",2010-10-12T16:13:04Z,16,CLOSED,Outlier detection in pandas.stats.moments func...,2011-06-23T04:46:14Z
3,"{'login': 'wesm', 'company': '@ursa-labs / @rs...",MEMBER,"DataFrame.apply, Series.applymap, Series.map s...",2010-12-11T06:19:58Z,"{'totalCount': 1, 'edges': [{'node': {'author'...",2010-10-12T16:10:48Z,15,CLOSED,.apply() API consistency fix,2010-12-11T06:19:58Z
4,"{'login': 'wesm', 'company': '@ursa-labs / @rs...",MEMBER,"For example:\ndf.add(series, axis=0)\ndf.add(s...",2011-02-19T16:13:00Z,"{'totalCount': 1, 'edges': [{'node': {'author'...",2010-10-11T03:19:39Z,14,CLOSED,Make more flexible arithmetic functions in Dat...,2011-02-19T16:13:00Z
5,"{'login': 'surbas', 'company': 'Pandium'}",NONE,"Hello,\nI not sure if this was the best way to...",2012-12-12T21:35:57Z,"{'totalCount': 4, 'edges': [{'node': {'author'...",2010-10-07T23:42:34Z,13,CLOSED,Add PyTable Paths to HDFStore,2012-12-13T01:37:53Z
0,"{'login': 'hector13', 'company': None}",NONE,"First, thank you for the pandas package -- it'...",2010-12-11T22:53:14Z,"{'totalCount': 2, 'edges': [{'node': {'author'...",2010-10-03T17:20:41Z,12,CLOSED,DataFrame and DataMatrix column ordering,2010-12-11T22:53:14Z
1,"{'login': 'wesm', 'company': '@ursa-labs / @rs...",MEMBER,Will need to give users advance warning of API...,2011-02-19T23:46:00Z,"{'totalCount': 2, 'edges': [{'node': {'author'...",2010-09-30T22:34:26Z,11,CLOSED,"Rename {Series, DataFrame, WidePanel}.fill() t...",2011-02-19T23:46:00Z
2,"{'login': 'wesm', 'company': '@ursa-labs / @rs...",MEMBER,Need to incorporate the selection of ranges of...,2011-06-23T19:38:51Z,"{'totalCount': 1, 'edges': [{'node': {'author'...",2010-09-30T22:33:14Z,10,CLOSED,Improvements to pandas.io.pytables / unit testing,2014-07-01T02:55:23Z
3,"{'login': 'wesm', 'company': '@ursa-labs / @rs...",MEMBER,,2011-05-18T02:43:33Z,"{'totalCount': 2, 'edges': [{'node': {'author'...",2010-09-30T22:29:36Z,9,CLOSED,Fix pandas.io.parsers.parseExcel to be more ro...,2011-05-18T02:43:33Z


# Check Limit

In [11]:
query_limit = '''
{
  viewer {
    login
  }
  rateLimit {
    limit
    cost
    remaining
    resetAt
  }
}
'''
result = run_query(query_limit)
result

{'data': {'viewer': {'login': 'dustiny5'},
  'rateLimit': {'limit': 5000,
   'cost': 1,
   'remaining': 4962,
   'resetAt': '2019-08-24T00:55:36Z'}}}

# Test concat and limit

In [42]:
limit = result['data']['rateLimit']['remaining']
limit

959

In [43]:
def concat_dfs(df):
    if limit == limit:
        print(limit)
    else:
        print('False')

In [45]:
print(concat_dfs(limit))
# Change limit
limit -=1
print(concat_dfs(limit))

959
None
958
None


# Automate

In [None]:
# https://stackoverflow.com/questions/15088037/python-script-to-do-something-at-the-same-time-every-day