# Queried Repo: Public, Stars >= 5, and Forks >= 5

In [1]:
import requests
import pandas as pd

In [2]:
# Open and read file
with open ('../../../Auth_Keys/graphql_api_auth.txt') as file:
    token = file.read()
    
# Specify the Authorization code
headers = {"Authorization": 'Bearer ' + token}

In [3]:
# Function to use requests.post to make an API call
def run_query(query): 
    request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers)
    if request.status_code == 200:
        return request.json()
    else:
        raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))

## Original query

In [39]:
query = '''
{
  search(query: "is:public stars:>=5 forks:>=5", type: REPOSITORY, first:15) {
    repositoryCount
    pageInfo {
      endCursor
      hasNextPage
    }
    nodes {
      ... on Repository {
        id
        createdAt
        updatedAt
        owner {
          login
        }
        name
        nameWithOwner
        primaryLanguage {
          name
        }
        description
        hasIssuesEnabled
        hasWikiEnabled
        licenseInfo {
          spdxId
        }
        object(expression:"master:README.md") {
          ... on Blob {
            text
          }
        }
        second_object: object(expression: "master") {
          ... on Commit {
            history {
              totalCount
            }
          }
        }
        forkCount
        stargazers {
          totalCount
        }
        issues {
          totalCount
        }
        pullRequests {
          totalCount
        }
      }
    }
  }
    rateLimit {
    limit
    cost
    remaining
    resetAt
  }
}
'''
query_2 = '''
{{
  search(query: "is:public stars:>=5 forks:>=5", type: REPOSITORY, first:5, after:"{end_cursor}") {{
    repositoryCount
    pageInfo {{
      endCursor
      hasNextPage
    }}
    nodes {{
      ... on Repository {{
        id
        createdAt
        updatedAt
        owner {{
          login
        }}
        name
        nameWithOwner
        primaryLanguage {{
          name
        }}
        description
        hasIssuesEnabled
        hasWikiEnabled
        licenseInfo {{
          spdxId
        }}
        object(expression:"master:README.md") {{
          ... on Blob {{
            text
          }}
        }}
        second_object: object(expression: "master") {{
          ... on Commit {{
            history {{
              totalCount
            }}
          }}
        }}
        forkCount
        stargazers {{
          totalCount
        }}
        issues {{
          totalCount
        }}
        pullRequests {{
          totalCount
        }}
      }}
    }}
  }}
    rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

variables = {
    "end_cursor": ""
}

## Seperate Query

### Query 1/3: Repo info

In [96]:
query = '''
{
  search(query: "is:public stars:>=5 forks:>=5", type: REPOSITORY, first:100) {
    repositoryCount
    pageInfo {
      endCursor
      hasNextPage
    }
    nodes {
      ... on Repository {
        id
        createdAt
        updatedAt
        owner {
          login
        }
        name
        nameWithOwner
        primaryLanguage {
          name
        }
        description
        hasIssuesEnabled
        hasWikiEnabled
        licenseInfo {
          spdxId
        }
        forkCount
        stargazers {
          totalCount
        }
        issues {
          totalCount
        }
        pullRequests {
          totalCount
        }
      }
    }
  }
    rateLimit {
    limit
    cost
    remaining
    resetAt
  }
}
'''

query_2 = '''
{{
  search(query: "is:public stars:>=5 forks:>=5", type: REPOSITORY, first:100, after:"{end_cursor}") {{
    repositoryCount
    pageInfo {{
      endCursor
      hasNextPage
    }}
    nodes {{
      ... on Repository {{
        id
        createdAt
        updatedAt
        owner {{
          login
        }}
        name
        nameWithOwner
        primaryLanguage {{
          name
        }}
        description
        hasIssuesEnabled
        hasWikiEnabled
        licenseInfo {{
          spdxId
        }}
        forkCount
        stargazers {{
          totalCount
        }}
        issues {{
          totalCount
        }}
        pullRequests {{
          totalCount
        }}
      }}
    }}
  }}
    rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

variables = {
    "end_cursor": ""
}

### Query 2/3: Readme

In [85]:
query = '''
{
  search(query: "is:public stars:>=5 forks:>=5", type: REPOSITORY, first: 100) {
    pageInfo {
      endCursor
      hasNextPage
    }
    nodes {
      ... on Repository {
        id
        object(expression: "master:README.md") {
          ... on Blob {
            text
          }
        }
      }
    }
  }
  rateLimit {
    limit
    cost
    remaining
    resetAt
  }
}
'''

query_2 = '''
{{
  search(query: "is:public stars:>=5 forks:>=5", type: REPOSITORY, first: 100, after:"{end_cursor}") {{
    pageInfo {{
      endCursor
      hasNextPage
    }}
    nodes {{
      ... on Repository {{
        id
        object(expression: "master:README.md") {{
          ... on Blob {{
            text
          }}
        }}
      }}
    }}
  }}
  rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

variables = {
    "end_cursor": ""
}

### Query 3/3: Commit Count

In [107]:
query = '''
{
  search(query: "is:public stars:>=5 forks:>=5", type: REPOSITORY, first: 15) {
    pageInfo {
      endCursor
      hasNextPage
    }
    nodes {
      ... on Repository {
        id
        object(expression: "master") {
          ... on Commit {
            history {
              totalCount
            }
          }
        }
      }
    }
  }
  rateLimit {
    limit
    cost
    remaining
    resetAt
  }
}
'''

query_2 = '''
{{
  search(query: "is:public stars:>=5 forks:>=5", type: REPOSITORY, first: 10, after:"{end_cursor}") {{
    pageInfo {{
      endCursor
      hasNextPage
    }}
    nodes {{
      ... on Repository {{
        id
        object: object(expression: "master") {{
          ... on Commit {{
            history {{
              totalCount
            }}
          }}
        }}
      }}
    }}
  }}
  rateLimit {{
    limit
    cost
    remaining
    resetAt
  }}
}}
'''

variables = {
    "end_cursor": ""
}

In [6]:
def to_df(result):
    
    # Get the keys of nodes.
    data = result['data']['search']['nodes']
    
    # Put the data into a dataframe
    df_repo = pd.DataFrame(data)
    
    return df_repo

def get_page_info(result):
    cur = result['data']['search']['pageInfo']['endCursor']
    has_cur = result['data']['search']['pageInfo']['hasNextPage']
    return cur, has_cur

def result_query(query):
    # Run first query
    result = run_query(query)

    # Save to DF
    df_repo = to_df(result)

    # Get end cursor and has cursor
    variables['end_cursor'], has_cursor = get_page_info(result)

    # Limit
    limit = result['data']['rateLimit']['remaining']
    
    return df_repo, has_cursor, limit

In [None]:
%%time
##### Comment Out this section if there's a 403 error #####

# Run first query
df_repo, has_cursor, limit = result_query(query)

print('Starting limit is: ', limit)
print('Start cursor: ', has_cursor, '\n')

##### Comment Out this section if there's a 403 error #####

# Run While Loop below
while has_cursor and (limit >= 0):
    
    # Run next queries
    df_repo_2, has_cursor, limit = result_query(query_2.format(**variables))

    # Concat to existing df_star
    df_repo = pd.concat([df_repo, df_repo_2])
    
    # Print limit and cursor
    print('Next limit is: ', limit)
    print('Next cursor: ', has_cursor, '\n')

Starting limit is:  4847
Start cursor:  True 

Next limit is:  4846
Next cursor:  True 

Next limit is:  4845
Next cursor:  True 

Next limit is:  4844
Next cursor:  True 

Next limit is:  4843
Next cursor:  True 

Next limit is:  4842
Next cursor:  True 

Next limit is:  4841
Next cursor:  True 

Next limit is:  4840
Next cursor:  True 

Next limit is:  4839
Next cursor:  True 

Next limit is:  4838
Next cursor:  True 

Next limit is:  4837
Next cursor:  True 

Next limit is:  4836
Next cursor:  True 

Next limit is:  4835
Next cursor:  True 

Next limit is:  4834
Next cursor:  True 

Next limit is:  4833
Next cursor:  True 

Next limit is:  4832
Next cursor:  True 

Next limit is:  4831
Next cursor:  True 

Next limit is:  4830
Next cursor:  True 

Next limit is:  4829
Next cursor:  True 

Next limit is:  4828
Next cursor:  True 

Next limit is:  4827
Next cursor:  True 

Next limit is:  4826
Next cursor:  True 

Next limit is:  4825
Next cursor:  True 

Next limit is:  4824
Next cur

In [87]:
print(df_repo.shape)
df_repo.head()

(1000, 2)


Unnamed: 0,id,object
0,MDEwOlJlcG9zaXRvcnkyODQ1NzgyMw==,{'text': '![freeCodeCamp.org Social Banner](ht...
1,MDEwOlJlcG9zaXRvcnkxNzc3MzY1MzM=,{'text': '[996.ICU](https://996.icu/#/en_US) =...
2,MDEwOlJlcG9zaXRvcnkxMTczMDM0Mg==,"{'text': '<p align=""center""><a href=""https://v..."
3,MDEwOlJlcG9zaXRvcnkxMDI3MDI1MA==,{'text': '# [React](https://reactjs.org/) &mid...
4,MDEwOlJlcG9zaXRvcnk0NTcxNzI1MA==,"{'text': '<div align=""center"">  <img src=""htt..."


### Pickle df

In [98]:
df_repo.to_pickle('df_repo_pandas_3.pk1')