# Collects all the repositories from the 55 programming languages 

### Reference:
S. Cass, "The top programming languages: Our latest rankings put Python on top-again - [Careers]," in IEEE Spectrum, vol. 57, no. 8, pp. 22-22, Aug. 2020, doi: 10.1109/MSPEC.2020.9150550.

In [None]:
import requests
import urllib
import csv
import time
from os import path
import datetime

In [None]:
IEEE_Rankings = []
with open('ieee_rankings.csv') as f:
    reader = csv.reader(f)
    IEEE_Rankings = list(reader)[0]


ATTRIBUTES_TO_FETCH = ['name','fork','url','issues_url','labels_url','created_at',
                       'updated_at','language','forks_count','open_issues', 'watchers', 'stargazers_count']

# Define functions
Define the functions that we are going to use for the fetching task, with a support for quicker access. Generate a github access token and copy it in the variable below for increasing usage limits.

In [None]:
REPOSITORY_API_BASE = 'https://api.github.com/search/repositories?'
RESULTS_PER_PAGE = 100
API_RESULTS_LIMIT = 1000
githubAccessToken = ''
fileName = 'repos.csv'

def ensureRequestCount(r):
    remaining = int(r.headers['X-RateLimit-Remaining'])
    print("Remaining limit: " + str(remaining))
    if (remaining == 0):
        reset_time = datetime.datetime.fromtimestamp(int(r.headers['X-RateLimit-Reset']))
        seconds_until_reset = (reset_time - datetime.datetime.now()).total_seconds() + 10
        print("Limit Exceed, Going to wait mode for " + str(seconds_until_reset))
        time.sleep(seconds_until_reset)
        
def getRequest(url):
    if (githubAccessToken!=''):
        headers = {'Authorization': 'token '+ githubAccessToken}
    else:
        headers = ''
    response = requests.get(url,headers=headers)
    ensureRequestCount(response)
    return response

def buildQueryUrl(language, pageNumber):
    QUERY_COMPONENTS = {
        "q": 'language:{0}'.format(language),
        "s": 'stars',
        "o": 'desc',
        "page": pageNumber,
        "per_page": RESULTS_PER_PAGE
    }
    return REPOSITORY_API_BASE + urllib.parse.urlencode(QUERY_COMPONENTS)

def getAllQueryUrls(language):
    request = getRequest(buildQueryUrl(language, 1))
    json_request = request.json()
    total_items = json_request['total_count']
    if (total_items > API_RESULTS_LIMIT):
        total_items = API_RESULTS_LIMIT # Limit the results to first 1000
    
    queryUrls = []
    totalPages = total_items / RESULTS_PER_PAGE
    for page in range(1,int(totalPages)+1):
        queryUrls.append(buildQueryUrl(language, page))
    return queryUrls

def fetchAttributesFromRepo(item):
    items = []
    for attribute in ATTRIBUTES_TO_FETCH:
        items.append(item[attribute])
    return items

# Fetch Repositories
Fetch all the repositories based on languages in the IEEE Ranking, sorted by star count

In [None]:
for language in IEEE_Rankings:
    print("- Starting for " + language)
    queryUrls = getAllQueryUrls(language)
    for url in queryUrls:
        print("-- " + url)
        r = getRequest(url)
        rObject = r.json()
        for repo in rObject['items']:
            isNewFile = False
            if path.exists(fileName)==False:
                isNewFile = True
            with open(fileName, 'a', newline='') as csvfile:
                writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                if (isNewFile):
                    writer.writerow(ATTRIBUTES_TO_FETCH)
                    isNewFile = False
                writer.writerow(fetchAttributesFromRepo(repo))