In [22]:
import requests
import pandas as pd
import numpy as np
import re
from ratelimit import limits, RateLimitException, sleep_and_retry
from backoff import on_exception, expo
from config import core

In [23]:
apikey = core.api_key

dataProviders = pd.read_csv("dataProviders.csv", index_col=0)

In [24]:
allProviders = []

@sleep_and_retry
@limits(calls=4, period=110)
def provider_search(lst):
    headers={"Authorization":"Bearer "+apikey}
    response = requests.get(provider, headers=headers)
    headers = response.headers
    status_code = response.status_code
    print(f"Provider: {provider} | Code: {status_code} | RateLimit: {headers['X-RateLimit-Remaining']} / {headers['X-RateLimit-Limit']} | Retry: {headers['X-RateLimit-Retry-After']}")
    hit = response.json()
    
    if status_code == 200:
        results = { "id" : hit["id"],
                    "openDoarId" : hit["openDoarId"],
                    "name" : hit["name"],
                    "email" : hit["email"],
                    "valid" : True,
                    "error" : ""
                }
    else:
        pid = re.search("\/(\d+)$", provider).group(1)
        results = { "id" : pid,
                    "openDoarId" : "N/A",
                    "name" : "N/A",
                    "email" : "N/A",
                    "valid" : False,
                    "error" : status_code
                }

    return results

for provider in dataProviders["dataProviders"]:
    allProviders.append(provider_search(provider))

Provider: https://api.core.ac.uk/v3/data-providers/39 | Code: 200 | RateLimit: 7 / 10 | Retry: 2022-05-11T10:10:06+0000
Provider: https://api.core.ac.uk/v3/data-providers/8278 | Code: 404 | RateLimit: 6 / 10 | Retry: 2022-05-11T10:10:06+0000
Provider: https://api.core.ac.uk/v3/data-providers/42 | Code: 200 | RateLimit: 5 / 10 | Retry: 2022-05-11T10:10:06+0000
Provider: https://api.core.ac.uk/v3/data-providers/4786 | Code: 200 | RateLimit: 4 / 10 | Retry: 2022-05-11T10:10:07+0000
Provider: https://api.core.ac.uk/v3/data-providers/645 | Code: 200 | RateLimit: 9 / 10 | Retry: 2022-05-11T10:11:56+0000
Provider: https://api.core.ac.uk/v3/data-providers/847 | Code: 200 | RateLimit: 8 / 10 | Retry: 2022-05-11T10:11:56+0000
Provider: https://api.core.ac.uk/v3/data-providers/1529 | Code: 200 | RateLimit: 7 / 10 | Retry: 2022-05-11T10:11:56+0000
Provider: https://api.core.ac.uk/v3/data-providers/12431 | Code: 200 | RateLimit: 6 / 10 | Retry: 2022-05-11T10:11:57+0000
Provider: https://api.core.ac

In [26]:
df_providers = pd.DataFrame(allProviders)

In [29]:
df_providers.to_csv("dataproviders_results.csv")

In [34]:
df_providers

Unnamed: 0,id,openDoarId,name,email,valid,error
0,39,463,Edinburgh Research Archive,scholcomms@ed.ac.uk,True,
1,8278,,,,False,404
2,42,162,Enlighten,william.nixon@glasgow.ac.uk,True,
3,4786,,Crossref,,True,
4,645,,Directory of Open Access Journals,,True,
...,...,...,...,...,...,...
415,1009,260,Publikationer från KTH,,True,
416,2104,3431,reposiTUm,,True,
417,531,571,TU Delft Repository,J.deLeeuwe@tudelft.nl,True,
418,2072,,Repositorio Institucional Universidad de Málaga,,True,
