In [1]:
import csv
from csv import DictReader
import requests
import json


This script gets the counts of relation types for each dataset in a given CSV or TXT file. For reference, the relation types are listed at https://support.datacite.org/docs/eventdata-guide#relation-type-id. 

In [2]:
# Enter name to CSV or text file containing list of dataset PIDs
datasetPIDFile = 'dataset_pids_Harvard Dataverse_2020.09.19_04.21.29.txt'

# Read in list of dataset PIDs from given CSV or text file
datasetPIDs = []

if '.csv' in datasetPIDFile:
    with open(datasetPIDFile, mode='r', encoding='utf-8') as f:
        csvDictReader = DictReader(f, delimiter=',')
        for row in csvDictReader:
            datasetPIDs.append(row['persistent_id'].rstrip())
elif 'txt' in datasetPIDFile:
    for datasetPID in datasetPIDFile:
        datasetPIDs.append(datasetPID.rstrip())

In [4]:
print(datasetPIDs)

['d', 'a', 't', 'a', 's', 'e', 't', '_', 'p', 'i', 'd', 's', '_', 'H', 'a', 'r', 'v', 'a', 'r', 'd', '', 'D', 'a', 't', 'a', 'v', 'e', 'r', 's', 'e', '_', '2', '0', '2', '0', '.', '0', '9', '.', '1', '9', '_', '0', '4', '.', '2', '1', '.', '2', '9', '.', 't', 'x', 't']


In [10]:
# Create CSV file for writing data requested from DataCite API
with open('relationtype_counts.csv', mode='w') as opencsvfile:
    opencsvfile = csv.writer(opencsvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    opencsvfile.writerow(['pid', 'relation-type', 'count'])


In [11]:
# Create variables for keeping count of script's progress
pidTotal = len(datasetPIDs)
print('Saving counts of relation types of %s to CSV file' % (pidTotal))

pidCount = 0

for pid in datasetPIDs:
    pidForDatacite = pid.replace(':', '=')
    dataciteEventsAPI = 'https://api.datacite.org/events?%s&page[size]=0' % (pidForDatacite)
    response = requests.get(dataciteEventsAPI)
    relationData = response.json()
    if len(relationData['meta']['relation-types']) > 0:
        for type in relationData['meta']['relation-types']:
            relationType = type['id']
            count = type['count']
            with open('relationtype_counts.csv', mode='a') as datasets:
                datasets = csv.writer(datasets, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                datasets.writerow([pid, relationType, count])
    pidCount += 1
    print('%s of %s' % (pidCount, pidTotal), end='\r', flush=True)


Saving counts of relation types of 115 to CSV file
doi:10.5064/F6GXXBVD
1 of 115
doi:10.5064/F6WBLX4I
2 of 115
doi:10.5064/F6MBCJ8M
3 of 115
doi:10.5064/F6D0YBEA
4 of 115
doi:10.5064/F6LBYMQO
5 of 115
doi:10.5064/F6AC1QAF
6 of 115
doi:10.5064/F6AGWUJG
7 of 115
doi:10.5064/F6FLYLN6
8 of 115
doi:10.5064/F6W3BY0S
9 of 115
doi:10.5064/F6IJOKZD
10 of 115
doi:10.5064/F6OJNVF1
11 of 115
doi:10.5064/F6BCQ6DM
12 of 115
doi:10.5064/F6R3772G
13 of 115
doi:10.5064/F6ETSBWQ
14 of 115
doi:10.5064/F6QPV2QK
15 of 115
doi:10.5064/F6NJU10I
16 of 115
doi:10.5064/F6Z5ZNWH
17 of 115
doi:10.5064/F6RQA7AQ
18 of 115
doi:10.5064/F630E3UT
19 of 115
doi:10.5064/F6VAZJ2M
20 of 115
doi:10.5064/F69JKYSS
21 of 115
doi:10.5064/F6VOIB8H
22 of 115
doi:10.5064/F6SHNJUG
23 of 115
doi:10.5064/F6EOZGLB
24 of 115
doi:10.5064/F6HY9R1Z
25 of 115
doi:10.5064/F6FHSZGO
26 of 115
doi:10.5064/F6HTXF0H
27 of 115
doi:10.5064/F6K4IVEP
28 of 115
doi:10.5064/F68PQFS8
29 of 115
doi:10.5064/F6AHDRFQ
30 of 115
doi:10.5064/F6NON328
31 of 1