# ROR
ROR is a community-led project to develop an open, sustainable, usable, and unique identifier for every research organization in the world.

https://ror.org/

This Jupyter Lab notebook can fetch the ROR-ID, Name, and Type for all registred organisations of one

In [1]:
import pandas as pd
import json
import requests
import math

In [2]:
# Country_Code is used to create the ROR list for specific country
# ROR API: https://ror.readme.io/docs/rest-api
# Country_Code is not optional, this script works only per country
COUNTRY_CODE = 'NL'

In [3]:
# getting data from ror API to find how many pages must be queried
r = requests.get('https://api.ror.org/organizations?filter=country.country_code:'+COUNTRY_CODE)
r.json()
ror_text = json.loads(r.text)
pages = math.ceil(ror_text['number_of_results']/20)
print(f'For COUNTRY_CODE={COUNTRY_CODE:}, {pages:} pages must be fetched')


For COUNTRY_CODE=NL, 81 pages must be fetched


In [4]:
# iterate over all pages and append the result to ror_list
ror_list = []
for page in range(1, pages+1):
    # print(page)
    r = requests.get('https://api.ror.org/organizations?filter=country.country_code:'+COUNTRY_CODE+'&page='+str(page))
    r.json()
    ror_text = json.loads(r.text)
    ror_list.append(ror_text['items'])
    # df = df.append(pd.DataFrame(ror_text['items']), ignore_index=True)
# print(ror_list)

In [5]:
# flatten the ror_list and create dataframe
flat_list = [item for sublist in ror_list for item in sublist]
dataframe = pd.DataFrame(flat_list)

In [6]:
# create a copy with only id and name
df_temp = dataframe[['id','name', 'types', 'country', 'aliases', 'addresses', 'email_address']].copy()

# add a new column with ror_id only, id has a hyperlink as id
df_temp['ror_id'] = df_temp['id'].str[16:]

In [7]:
# create dictionaries, not really used here, but can be copies and used
# in scripts
dict_ror_name = {}
dict_name_ror = {}
for name, ror_id in zip(df_temp['name'], df_temp['ror_id']):
    dict_ror_name[ror_id] = name
    dict_name_ror[name] = ror_id

In [8]:
# Check if it works
# print(dict_ror_name['01pmm8272'])
print(dict_name_ror['Erasmus MC'])

018906e22


In [9]:
# remove 'id' before saving the dataframe to CSV
df_temp.drop('id', axis=1, inplace=True)

In [10]:
# save to CSV
df_temp.to_csv('uni_ror_'+COUNTRY_CODE+'.csv')

In [11]:
# import ROR CSV into dictionaries
# Country_Code is not optional, this script works only per country
COUNTRY_CODE = 'NL'
dict_ror_name = {}
dict_name_ror = {}
df_temp = pd.read_csv('uni_ror_'+COUNTRY_CODE+'.csv')
for name, ror_id in zip(df_temp['name'], df_temp['ror_id']):
    dict_ror_name[ror_id] = name
    dict_name_ror[name] = ror_id
