In [1]:
import requests
import urllib
import csv
import time
from os import path

In [2]:
IEEE_Rankings = [
    'Python', 'Java', 'C', 'C++', 'JavaScript', 'R', 'Arduino', 'Go', 'Swift', 'Matlab',
    'Ruby', 'Dart', 'SQL', 'PHP', 'Assembly', 'Scala', 'HTML', 'Kotlin', 'Julia', 'Rust',
    'Shell', 'Processing', 'C#', 'SAS', 'Fortran', 'Cuda', 'Visual Basic', 'Objective-C', 'Delphi', 'Perl',
    'Verilog', 'VHDL', 'LabView', 'Elixir', 'F#', 'Prolog', 'Lua', 'Lisp', 'Ada', 'Apache Groovy', 'Scheme',
    'Haskell', 'Cobol', 'Clojure', 'ABAP', 'D', 'Forth', 'Ocaml', 'TCL', 'LadderLogic', 'Erlang',
    'Eiffel', 'CoffeeScript', 'J', 'Racket'
]
UNAVAILABLE_LANGUAGES = ['Arduino', 'LadderLogic', 'Apache Groovy']


ATTRIBUTES_TO_FETCH = ['name','fork','url','issues_url','labels_url','created_at',
                       'updated_at','language','forks_count','open_issues', 'watchers', 'stargazers_count']

In [3]:
REPOSITORY_API_BASE = 'https://api.github.com/search/repositories?'
RESULTS_PER_PAGE = 100
API_RESULTS_LIMIT = 1000
githubAccessToken = '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'
fileName = 'repos.csv'

def ensureRequestCount(r):
    remaining = int(r.headers['X-RateLimit-Remaining'])
    print("Remaining limit: " + str(remaining))
    if (remaining == 0):
        reset_time = datetime.datetime.fromtimestamp(int(r.headers['X-RateLimit-Reset']))
        seconds_until_reset = (reset_time - datetime.datetime.now()).total_seconds() + 10
        print("Limit Exceed, Going to wait mode for " + str(seconds_until_reset))
        time.sleep(seconds_until_reset)
        
def getRequest(url):
    headers = {'Authorization': 'token '+ githubAccessToken}
    response = requests.get(url,headers=headers)
    ensureRequestCount(response)
    return response

def buildQueryUrl(language, pageNumber):
    QUERY_COMPONENTS = {
        "q": 'language:{0}'.format(language),
        "s": 'stars',
        "o": 'desc',
        "page": pageNumber,
        "per_page": RESULTS_PER_PAGE
    }
    return REPOSITORY_API_BASE + urllib.parse.urlencode(QUERY_COMPONENTS)

def getAllQueryUrls(language):
    request = getRequest(buildQueryUrl(language, 1))
    json_request = request.json()
    
    total_items = json_request['total_count']
    if (total_items > API_RESULTS_LIMIT):
        total_items = API_RESULTS_LIMIT # Limit the results to first 1000
    
    queryUrls = []
    totalPages = total_items / RESULTS_PER_PAGE
    for page in range(1,int(totalPages)+1):
        queryUrls.append(buildQueryUrl(language, page))
    return queryUrls

def fetchAttributesFromRepo(item):
    items = []
    for attribute in ATTRIBUTES_TO_FETCH:
        items.append(item[attribute])
    return items

In [4]:
LANGUAGES_TO_SKIP = UNAVAILABLE_LANGUAGES

for language in IEEE_Rankings:
    if language in LANGUAGES_TO_SKIP:
        continue
    print("- Starting for " + language)
    queryUrls = getAllQueryUrls(language)
    for url in queryUrls:
        print("-- " + url)
        r = getRequest(url)
        rObject = r.json()
        for repo in rObject['items']:
            isNewFile = False
            if path.exists(fileName)==False:
                isNewFile = True
            with open(fileName, 'a', newline='') as csvfile:
                writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                if (isNewFile):
                    writer.writerow(ATTRIBUTES_TO_FETCH)
                    isNewFile = False
                writer.writerow(fetchAttributesFromRepo(repo))

- Starting for Python
Remaining limit: 29
-- https://api.github.com/search/repositories?q=language%3APython&s=stars&o=desc&page=1&per_page=100
Remaining limit: 28
-- https://api.github.com/search/repositories?q=language%3APython&s=stars&o=desc&page=2&per_page=100
Remaining limit: 27
-- https://api.github.com/search/repositories?q=language%3APython&s=stars&o=desc&page=3&per_page=100
Remaining limit: 26
-- https://api.github.com/search/repositories?q=language%3APython&s=stars&o=desc&page=4&per_page=100
Remaining limit: 25
-- https://api.github.com/search/repositories?q=language%3APython&s=stars&o=desc&page=5&per_page=100
Remaining limit: 24
-- https://api.github.com/search/repositories?q=language%3APython&s=stars&o=desc&page=6&per_page=100
Remaining limit: 23
-- https://api.github.com/search/repositories?q=language%3APython&s=stars&o=desc&page=7&per_page=100
Remaining limit: 22
-- https://api.github.com/search/repositories?q=language%3APython&s=stars&o=desc&page=8&per_page=100
Remaining 

['LadderLogic']