# ROR
ROR is a community-led project to develop an open, sustainable, usable, and unique identifier for every research organization in the world.

https://ror.org/

This Jupyter Lab notebook can fetch the ROR-ID, Name, and Type for all registred organisations of one or more countries asynchronisly (=fast).

In [1]:
import pandas as pd
import json
import requests
import math
!pip install aiohttp
import asyncio
import aiohttp
from timeit import default_timer



In [2]:
# Country_Code is used to create the ROR list for specific country
# ROR API: https://ror.readme.io/docs/rest-api
# Country_Code is not optional, this script works with a list of countries e.g.: COUNTRIES = ['NL', 'BE', 'UK']
COUNTRIES = ['NL', 'BE']
ROR_URL = 'https://api.ror.org/organizations?filter=country.country_code:'

In [3]:
def nice_list(list):
    return f'{", ".join(COUNTRIES[:-1])}'+ ' and '*(len(COUNTRIES)>1) + f'{COUNTRIES[-1]}'

async def getRORpages2(url,countries):
    country_pages = {}
    async with aiohttp.ClientSession() as session:
        for country in countries:
            url3 = url+country
            response = await session.get(url3, ssl=False)
            country_pages[country] = math.ceil((await response.json())['number_of_results']/20)
    return country_pages


async def getROR3(url, countries):
    results = []
    country_pages = await getRORpages2(url,countries)
    async with aiohttp.ClientSession() as session:
        for country in countries:
            for page in range(1, country_pages[country]+1):
                url2 = url+country+'&page='+str(page)
                response = await session.get(url2, ssl=False)
                results.append((await response.json())['items'])
    return results


start_time = default_timer()
print(f'Fetching all the ROR-ids for {nice_list(COUNTRIES)}')
ror_list4 = await getROR3(ROR_URL, COUNTRIES)
print(f"Completed in: {(default_timer() - start_time):5.2f}s")

Fetching all the ROR-ids for NL and BE
Completed in:  8.93s


In [4]:
# flatten the ror_list and create dataframe
flat_list = [item for sublist in ror_list4 for item in sublist]
dataframe = pd.DataFrame(flat_list)

In [5]:
# create a copy with only id and name
df_temp = dataframe[['id','name', 'types', 'country', 'aliases', 'addresses', 'email_address']].copy()

# add a new column with ror_id only, id has a hyperlink as id
df_temp['ror_id'] = df_temp['id'].str[16:]

In [6]:
# create dictionaries, not really used here, but can be copies and used
# in scripts
dict_ror_name = {}
dict_name_ror = {}
for name, ror_id in zip(df_temp['name'], df_temp['ror_id']):
    dict_ror_name[ror_id] = name
    dict_name_ror[name] = ror_id

In [7]:
# Check if it works
# print(dict_ror_name['01pmm8272'])
print(dict_name_ror['Erasmus MC'])

018906e22


In [8]:
# remove 'id' before saving the dataframe to CSV
df_temp.drop('id', axis=1, inplace=True)

In [9]:
# save to CSV
COUNTRIES2 = '_'.join(COUNTRIES)
df_temp.to_csv('uni_ror_'+COUNTRIES2+'.csv')

In [10]:
# import ROR CSV into dictionaries
# Country_Code is not optional, this script works only per country
COUNTRY_CODE = 'NL'
dict_ror_name = {}
dict_name_ror = {}
df_ror = pd.read_csv('uni_ror_'+COUNTRY_CODE+'.csv')
for name, ror_id in zip(df_ror['name'], df_ror['ror_id']):
    dict_ror_name[ror_id] = name
    dict_name_ror[name] = ror_id
