## Imports

In [1]:
import json
import os
from datetime import datetime
import time

import pandas as pd
import requests

## Common elements

In [2]:
headers = ["doi", "doi_url", "repository_url",
           "is_open_access", 
          "authors_count", "authors_affiliation_count", "authors_familyname_count", "authors_givennames_count", 
           "authors_gnd_count", "authors_name_count", "authors_orcid_count",
          "has_funding_info", "journal", "keywords_freetext_count", "keywords_controlled_vocabularies_count", "has_language", "has_license", "license_name",
           "has_conference_acronym", "has_conference_dates", "has_conference_place", "has_conference_session",
           "has_conference_session_part", "has_conference_title", "has_conference_website", 
           "has_creation_date", "creation_date", "has_other_dates", "other_dates",
           "references_no_identifiers_count", "references_with_identifiers_count", "has_title", "has_version",
           "files"
          ]

## Get posters metadata from Zenodo

In [4]:
# token
zenodo_access_token = os.getenv("ZENODO_ACCESS_TOKEN")

In [None]:
# Zenodo doesn't return more than 10k results per request so need to break down into date ranges with less than 10k posters created each
date_range_list = ["{* TO 2015-12-31]", 
                   "[2021-01-01 TO 2022-12-31]", 
                   "[2023-01-01 TO 2024-11-30]"] 
rows_list = []

for date_range in date_range_list:
    
    increment_page = True
    page = 0
    
    while increment_page:
        page +=1
        params = {
            "resource_type": "poster",
            "q": "created:" + date_range,
            "status": "published", 
            "sort": "mostrecent", 
            "all_versions": "false",
            "size": 200,
            "page": page,
            "access_token": zenodo_access_token,
        }
        response = requests.get(
            "https://zenodo.org/api/records",
            params= params,
        )

        if response.status_code == 200:
            r = response.json()
            if r["hits"]["hits"]:

                results = r["hits"]["hits"]

                # get metadata - zenodo record schema https://github.com/zenodo/zenodo/blob/master/zenodo/modules/records/jsonschemas/records/record-v1.0.0.json                                                                                                                                                                         if metadata["access_right"] == "open":
                for result in results:

                    row_dict = {}
                    metadata = result["metadata"]

                    #id and links
                    row_dict["doi"] = False
                    if "doi" in result.keys():
                        row_dict["doi"] = result["doi"]
                    row_dict["doi_url"] = False
                    if "doi_url" in result.keys():
                        row_dict["doi_url"] = result["doi_url"]
                    row_dict["repository_url"] = False
                    if "links" in result.keys():
                        if "self_html" in result["links"].keys():
                            row_dict["repository_url"] = result["links"]["self_html"]

                    # access right
                    row_dict["is_open_access"] = False
                    if "access_right" in metadata.keys():
                        if metadata["access_right"] == "open":
                            row_dict["is_open_access"] = True         

                    # authors
                    authors_count = 0
                    authors_affiliation_count = 0
                    authors_familyname_count = 0
                    authors_givennames_count = 0 
                    authors_gnd_count = 0
                    authors_name_count = 0
                    authors_orcid_count = 0
                    if "creators" in metadata.keys():
                        authors_list = metadata["creators"]
                        authors_count = len(authors_list)
                        for author in authors_list:
                            if "affiliation" in author.keys():
                                if author["affiliation"] != None:
                                    if len(author["affiliation"].strip())>0:
                                         authors_affiliation_count += 1
                            if "familyname" in author.keys():
                                if len(author["familyname"].strip())>0:
                                     authors_familyname_count += 1
                            if "givennames" in author.keys():
                                if len(author["givennames"].strip())>0:
                                     authors_givennames_count += 1
                            if "gnd" in author.keys():
                                if len(author["gnd"].strip())>0:
                                     authors_gnd_count += 1
                            if "name" in author.keys():
                                if len(author["name"].strip())>0:
                                     authors_name_count += 1
                            if "orcid" in author.keys():
                                if len(author["orcid"].strip())>0:
                                     authors_orcid_count += 1

                    row_dict["authors_count"] = authors_count
                    row_dict["authors_affiliation_count"] = authors_affiliation_count
                    row_dict["authors_familyname_count"] = authors_familyname_count
                    row_dict["authors_givennames_count"] = authors_givennames_count 
                    row_dict["authors_gnd_count"] = authors_gnd_count
                    row_dict["authors_name_count"] = authors_name_count
                    row_dict["authors_orcid_count"] = authors_orcid_count

                    # grants
                    row_dict["has_funding_info"] = False
                    if "grants" in metadata.keys():
                        if len(metadata["grants"])>0:
                            row_dict["has_funding_info"] = True        

                    # journal
                    row_dict["has_journal"] = False
                    if "journal" in metadata.keys():
                        if len(metadata["journal"])>0:
                            row_dict["has_journal"] = True        

                    # keywords free text
                    row_dict["keywords_freetext_count"] = 0
                    if "references" in metadata.keys():
                        if len(metadata["references"])>0:
                            row_dict["keywords_freetext_count"] = len(metadata["references"])

                    # keywords controlled vocabularies
                    row_dict["keywords_controlled_vocabularies_count"] = 0
                    if "related_identifiers" in metadata.keys():
                        if len(metadata["related_identifiers"])>0:
                            row_dict["keywords_controlled_vocabularies_count"] = len(metadata["related_identifiers"])

                    # language
                    row_dict["has_language"] = False
                    if "language" in metadata.keys():
                        if len(metadata["language"])>0:
                            row_dict["has_language"] = True        

                    # license
                    row_dict["has_license"] = False
                    row_dict["license_name"] = "NA"
                    if "license" in metadata.keys():
                        row_dict["has_license"] = True
                        row_dict["license_name"] = metadata["license"]["id"]        

                    # conference
                    row_dict["has_conference_acronym"] = False
                    row_dict["has_conference_dates"] = False
                    row_dict["has_conference_place"] = False
                    row_dict["has_conference_session"] = False
                    row_dict["has_conference_session_part"] = False
                    row_dict["has_conference_title"] = False
                    row_dict["has_conference_website"] = False
                    if "meeting" in metadata.keys():
                        conference_metadata = metadata["meeting"]

                        if "acronym" in conference_metadata.keys():
                            if len(conference_metadata["acronym"].strip())>0:
                                row_dict["has_conference_acronym"] = True          

                        if "dates" in conference_metadata.keys():
                            if len(conference_metadata["dates"].strip())>0:
                                row_dict["has_conference_dates"] = True         

                        if "place" in conference_metadata.keys():
                            if len(conference_metadata["place"].strip())>0:
                                row_dict["has_conference_place"] = True         

                        if "session" in conference_metadata.keys():
                            if len(conference_metadata["session"].strip())>0:
                                row_dict["has_conference_session"] = True           

                        if "session_part" in conference_metadata.keys():
                            if len(conference_metadata["session_part"].strip())>0:
                                row_dict["has_conference_session_part"] = True          

                        if "title" in conference_metadata.keys():
                            if len(conference_metadata["title"].strip())>0:
                                row_dict["has_conference_title"] = True

                        if "url" in conference_metadata.keys():
                            if len(conference_metadata["url"].strip())>0:
                                row_dict["has_conference_website"] = True

                    # creation_date
                    row_dict["has_creation_date"] = False
                    row_dict["creation_date"] = "NA"
                    if "created" in result.keys():
                        if len(result["created"].strip())>0:
                            row_dict["has_creation_date"] = True
                            row_dict["creation_date"] = result["created"]

                    # other dates:
                    row_dict["has_other_dates"] = False
                    row_dict["other_dates"] = "NA"
                    if "dates" in metadata.keys():
                        if len(metadata["dates"])>0:
                            row_dict["has_other_dates"] = True
                            dates_count = 0
                            dates_info = ""
                            for date in metadata["dates"]:
                                if dates_count>0:
                                    dates_info += "; "
                                dates_info += str(date)
                                files_count += 1
                            row_dict["other_dates"] = dates_info

                    # references
                    row_dict["references_no_identifiers_count"] = 0
                    if "references" in metadata.keys():
                        if len(metadata["references"])>0:
                            row_dict["references_no_identifiers_count"] = len(metadata["references"])

                    # related identifiers count
                    row_dict["references_with_identifiers_count"] = 0
                    if "related_identifiers" in metadata.keys():
                        if len(metadata["related_identifiers"])>0:
                            row_dict["references_with_identifiers_count"] = len(metadata["related_identifiers"])

                    # title
                    row_dict["has_title"] = False
                    if "title" in metadata.keys():
                        if len(metadata["title"].strip())>0:
                            row_dict["has_title"] = True        

                    # version
                    row_dict["has_version"] = False
                    if "version" in metadata.keys():
                        if len(metadata["version"].strip())>0:
                            row_dict["has_version"] = True

                    # files
                    files = result["files"]
                    files_names = ""
                    files_count = 0
                    for file in files:
                        if files_count>0:
                            files_names += "; "
                        files_names += str(file["key"])
                        files_count += 1
                    row_dict["files"] = files_names

                    

            else:
                print("No more pages", response.status_code, str(page))
                increment_page = False

        else:
            print("Error", response.status_code, response.content, str(page))
            increment_page = False

        #increment_page = False
        
# save
rows_list.append(row_dict)
df = pd.DataFrame(rows_list) 
df.to_csv("zenodo.csv", index = False)    

[{'created': '2015-11-11T14:54:15+00:00', 'modified': '2017-09-06T06:56:04.405830+00:00', 'id': 33634, 'conceptrecid': '619320', 'doi': '10.5281/zenodo.33634', 'conceptdoi': '', 'doi_url': 'https://doi.org/10.5281/zenodo.33634', 'metadata': {'title': 'Arylazopyrazoles: Quantitative, slow-relaxing photoswitches', 'doi': '10.5281/zenodo.33634', 'publication_date': '2015-11-11', 'description': '<p>Poster presented at RSC Organic Division poster symposium 2014</p>', 'access_right': 'restricted', 'creators': [{'name': 'Weston, Claire E.', 'affiliation': 'Imperial College London'}], 'keywords': ['arylazopyrazole', 'azoheteroarene', 'azopyrazole'], 'related_identifiers': [{'identifier': '10.1021/ja505444d', 'relation': 'cites', 'scheme': 'doi'}], 'resource_type': {'title': 'Poster', 'type': 'poster'}, 'relations': {'version': [{'index': 0, 'is_last': True, 'parent': {'pid_type': 'recid', 'pid_value': '619320'}}]}}, 'title': 'Arylazopyrazoles: Quantitative, slow-relaxing photoswitches', 'links

In [13]:
r["hits"]["hits"][-1]

{'created': '2014-03-10T17:36:39+00:00',
 'modified': '2024-08-06T14:18:01.654204+00:00',
 'id': 1101,
 'conceptrecid': '611120',
 'metadata': {'title': 'ERA-ENVHEALTH',
  'publication_date': '2009-11-30',
  'description': '1. Background and Objectives\nReducing uncertainties about the links between environment and health (E&H) and taking\naction through protection and prevention measures is necessary. For these to be effective,\ncooperation must be improved and research driven by a common set of priorities. ERA-\nENVHEALTH, co-funded by the European Commission under FP7 "Coordination Actions",\nenhances European coordination of environment and health research programming.\n2. Methods\nThe project started in September 2008 with 16 E&H research programmers from 10 countries.\nTo establish sustainable collaboration, an integrated step-by-step approach is implemented to\nevaluate methods, define priority themes and respond to these through joint activities and\ntransnational calls.\nThe o

In [39]:
response.headers

{'server': 'nginx', 'date': 'Tue, 05 Nov 2024 00:48:49 GMT', 'content-type': 'application/json', 'content-length': '61', 'access-control-allow-origin': '*', 'access-control-expose-headers': 'Content-Type, ETag, Link, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Reset', 'x-ratelimit-limit': '1000', 'x-ratelimit-remaining': '997', 'x-ratelimit-reset': '1730767751', 'retry-after': '21', 'permissions-policy': 'interest-cohort=()', 'x-frame-options': 'sameorigin', 'x-xss-protection': '1; mode=block', 'x-content-type-options': 'nosniff', 'content-security-policy': "default-src 'self' fonts.googleapis.com *.gstatic.com data: 'unsafe-inline' 'unsafe-eval' blob: zenodo-broker.web.cern.ch zenodo-broker-qa.web.cern.ch maxcdn.bootstrapcdn.com cdnjs.cloudflare.com ajax.googleapis.com webanalytics.web.cern.ch", 'strict-transport-security': 'max-age=31556926; includeSubDomains', 'referrer-policy': 'strict-origin-when-cross-origin', 'set-cookie': '5569e5a730cade8ff2b54f1e815f3670=5f6844271992

In [8]:
results[9]

{'created': '2024-10-30T14:04:35.247745+00:00',
 'modified': '2024-10-30T14:04:35.640913+00:00',
 'id': 14008448,
 'conceptrecid': '14008447',
 'doi': '10.5281/zenodo.14008448',
 'conceptdoi': '10.5281/zenodo.14008447',
 'doi_url': 'https://doi.org/10.5281/zenodo.14008448',
 'metadata': {'title': 'Analysis of the philosophical foundation of futures studies and its implications for prospective LCA',
  'doi': '10.5281/zenodo.14008448',
  'publication_date': '2024-10-29',
  'description': '<p>In recent years, the subfield of prospective life cycle assessment (LCA) has emerged and continues to develop. As a result, it is expected that prospective LCA will become a critical tool to support decision-making processes, among others in the context of the PHOENIX project. Yet, the discipline&rsquo;s philosophical foundations have not yet been clearly established, neither has its boundaries to other future-related academic disciplines, potentially leading to unclear scope definitions. This work a

In [241]:
results[0]

{'created': '2024-10-31T23:59:52.972111+00:00',
 'modified': '2024-10-31T23:59:53.259916+00:00',
 'id': 14020087,
 'conceptrecid': '14020086',
 'doi': '10.5281/zenodo.14020087',
 'conceptdoi': '10.5281/zenodo.14020086',
 'doi_url': 'https://doi.org/10.5281/zenodo.14020087',
 'metadata': {'title': 'Is the jet variability reflected in the BLR emission in FSRQs',
  'doi': '10.5281/zenodo.14020087',
  'publication_date': '2024-10-31',
  'description': '<p>We present an spectroscopic study of a sample of bright gamma-ray blazars. Our dataset includes spectra of a sample of about 10 targets, all of them Flat-Spectrum Radio Quasars (FSRQs). The spectra were obtained nearly periodically during the period from 2008 to 2018. Our aim is to search for variations in the luminosity of the emission lines, &nbsp;related to variations in the continuum flux. We have used a fitting code (PyQSOFit) specifically developed to model the optical spectra of quasars. It permits to decompose the emission in diff

### Notes:  
- Role of researchers are not shown on the online record or the api record: why ask it?
- Additional dates: The dates are shown in the online record but not included in the api record, why?
- publication_date: provided by user
- creation_date: date when the record was created (since we are only looking at the latest version, this is the creation date of the latest version)

## Get posters metadata from Figshare

In [198]:
# token
figshare_access_token = os.getenv("FIGSHARE_ACCESS_TOKEN")

In [199]:
# inspired from the example available here https://help.figshare.com/article/how-to-use-the-figshare-api#search-ids
BASE_URL = "https://api.figshare.com/v2"
search_logic = ":item_type:poster"
query = '{"search_for": "' + search_logic + '"}'
y = json.loads(query)
results = []
for j in range(1, 2):
    r = json.loads(
        requests.post(
            BASE_URL + "/articles/search?page_size=1000&page={}".format(j), params=y
        ).content
    )
    results.extend(r)

In [200]:
rows_list = [] 

for result in results[0:5]:
    figshare_id = result["id"]
    api_call_headers = {'Authorization': "token " + str(figshare_access_token)}
    r=requests.get(BASE_URL + '/articles/' + str(figshare_id), headers=api_call_headers)
    metadata = json.loads(r.text)
    
    row_dict = {}
    
    #id and links
    row_dict["doi"] = metadata["doi"]
    row_dict["doi_url"] = "https://doi.org/" + metadata["doi"]
    row_dict["repository_url"] = metadata["url_public_html"]

    
    # access right
    row_dict["is_open_access"] = False
    if "is_public" in metadata.keys():
        if metadata["is_public"]:
            row_dict["is_open_access"] = True        
    
    # authors
    authors_count = 0
    authors_affiliation_count = -1
    authors_familyname_count = -1
    authors_givennames_count = -1
    authors_gnd_count = -1
    authors_name_count = 0
    authors_orcid_count = 0
    if "authors" in metadata.keys():
        authors_list = metadata["authors"]
        authors_count = len(authors_list)
        for author in authors_list:
            if "full_name" in author.keys():
                if len(author["full_name"].strip())>0:
                     authors_name_count += 1
            if "orcid_id" in author.keys():
                if len(author["orcid_id"].strip())>0:
                     authors_orcid_count += 1
                    
    row_dict["authors_count"] = authors_count
    row_dict["authors_affiliation_count"] = authors_affiliation_count
    row_dict["authors_familyname_count"] = authors_familyname_count
    row_dict["authors_givennames_count"] = authors_givennames_count 
    row_dict["authors_gnd_count"] = authors_gnd_count
    row_dict["authors_name_count"] = authors_name_count
    row_dict["authors_orcid_count"] = authors_orcid_count
    
    # grants
    row_dict["has_funding_info"] = False
    if "funding_list" in metadata.keys():
        if len(metadata["funding_list"])>0:
            row_dict["has_funding_info"] = True        
        
    # journal
    row_dict["has_journal"] = None    
        
    # keywords free text
    row_dict["keywords_freetext_count"] = 0
    if "tags" in metadata.keys():
        if len(metadata["tags"])>0:
            row_dict["keywords_freetext_count"] = len(metadata["tags"])
        
    # keywords controlled vocabularies
    row_dict["keywords_controlled_vocabularies_count"] = -1
        
    # language
    row_dict["has_language"] = None    
        
    # license
    row_dict["has_license"] = False
    row_dict["license_name"] = "N/A"
    if "license" in metadata.keys():
        row_dict["has_license"] = True
        row_dict["license_name"] = metadata["license"]["name"]        
        
    # conference
    row_dict["has_conference_acronym"] = None
    row_dict["has_conference_dates"] = None
    row_dict["has_conference_place"] = None
    row_dict["has_conference_session"] = None
    row_dict["has_conference_session_part"] = None
    row_dict["has_conference_title"] = None
    row_dict["has_conference_website"] = None
            
    # creation date
    row_dict["has_creation_date"] = False
    row_dict["creation_date"] = "NA"
    if "published_date" in metadata.keys():
        if len(metadata["published_date"].strip())>0:
            row_dict["has_creation_date"] = True
            row_dict["creation_date"] = metadata["published_date"]
    
    #other dates
    row_dict["has_other_dates"] = None
    row_dict["other_dates"] = "NA"
    
    # references with no identifiers count
    row_dict["references_no_identifiers_count"] = -1
    
    # references with identifiers count
    row_dict["references_with_identifiers_count"] = 0
    if "related_materials" in metadata.keys():
        if len(metadata["related_materials"])>0:
            row_dict["references_with_identifiers_count"] = len(metadata["related_materials"])

    # title
    row_dict["has_title"] = False
    if "title" in metadata.keys():
        if len(metadata["title"].strip())>0:
            row_dict["has_title"] = True        
    
    # version
    row_dict["has_version"] = False
    if "version" in metadata.keys():
        row_dict["has_version"] = True
    
    # files
    files = metadata["files"]
    files_names = ""
    files_count = 0
    for file in files:
        if files_count>0:
            files_names += "; "
        files_names += str(file["name"])
        files_count += 1
    row_dict["files"] = files_names
                          
    # save
    rows_list.append(row_dict)

In [201]:
df = pd.DataFrame(rows_list) 
df.to_csv("figshare.csv", index = False)

# Playground

# Zenodo

In [5]:
# query
response = requests.get(
    "https://zenodo.org/api/records",
    params={
        "resource_type": "poster",
        "size": 200,
        "access_token": zenodo_access_token,
    },
)
r = response.json()
#results = r["hits"]["hits"]
#c
#metadata = results[0]
#if title in metadata:
    
# print("Found", len(results), "results for", dataset_documentation)

# # save results
# for result in results:
#     title = result["metadata"]["title"]
#     publication_date = result["metadata"]["publication_date"]
#     url = result["doi_url"]
#     df.loc[len(df)] = [title, publication_date, url, dataset_documentation]

In [57]:
results = r["hits"]["hits"]
display(results[-1]["metadata"])

{'title': 'Public policy which eases the personal financial impact of living with multiple long-term conditions',
 'doi': '10.5281/zenodo.13736732',
 'publication_date': '2024-09-09',
 'description': '<p>This poster summarises a policy synthesis about financial policies that support people living with multiple long-term conditions</p>',
 'access_right': 'open',
 'creators': [{'name': 'Welch, Jack', 'affiliation': None},
  {'name': 'McMahon, James', 'affiliation': None},
  {'name': 'Dace, Sally', 'affiliation': None},
  {'name': 'Laidlaw, Lynn', 'affiliation': None},
  {'name': 'Poole, Robin', 'affiliation': None},
  {'name': 'Chandaman, Luke', 'affiliation': None},
  {'name': 'Holland, Emilia', 'affiliation': None},
  {'name': 'Wilkinson, Becky', 'affiliation': None},
  {'name': 'Cheung, Kelly', 'affiliation': None},
  {'name': 'Kaur Gill, Jaskiran', 'affiliation': None},
  {'name': 'Jacob, Chandni', 'affiliation': None},
  {'name': 'Alwan, Nisreen', 'affiliation': None},
  {'name': 'F

In [42]:
r = requests.get("https://zenodo.org/api/records/13867012")

In [44]:
display(r.json())

{'created': '2024-10-01T12:18:56.304014+00:00',
 'modified': '2024-10-01T12:18:57.326668+00:00',
 'id': 13867012,
 'conceptrecid': '13867011',
 'doi': '10.5281/zenodo.13867012',
 'conceptdoi': '10.5281/zenodo.13867011',
 'doi_url': 'https://doi.org/10.5281/zenodo.13867012',
 'metadata': {'title': 'Electron-Nucleus Cross-Section Measurements at MAMI for Neutrino Physics',
  'doi': '10.5281/zenodo.13867012',
  'publication_date': '2024-10-01',
  'description': '<p>Electron scattering experiments are powerful tools to study problems in nuclear physics. Recently, theorists achieved to extend ab initio calculations to medium-mass nuclei. The calculations can be verified with new electron scattering experiments. These experiments can reveal information about nuclear ground state properties (elastic scattering) as well as the dynamics inside the nucleus (quasielastic scattering). Furthermore, neutrino physics experiments need precise nuclear physics input. Due to the similarity between electr

In [46]:
headers = {"Content-Type": "application/vnd.zenodo.v1+json"}
r = requests.get("https://zenodo.org/api/records/4534262", headers=headers)   

In [47]:
display(r.json())

{'created': '2021-02-11T12:08:37.383111+00:00',
 'modified': '2024-07-19T08:37:13.597868+00:00',
 'id': 4534262,
 'conceptrecid': '4534261',
 'doi': '10.5281/zenodo.4534262',
 'conceptdoi': '10.5281/zenodo.4534261',
 'doi_url': 'https://doi.org/10.5281/zenodo.4534262',
 'metadata': {'title': 'SimuSafe Project Poster--General Overview',
  'doi': '10.5281/zenodo.4534262',
  'publication_date': '2021-02-11',
  'description': '<p>This upload is the project poster used to disseminate a general overview and description of the SIMUSAFE project.&nbsp;&nbsp;</p>',
  'access_right': 'open',
  'creators': [{'name': 'Allison Duncan',
    'affiliation': 'Coventry University',
    'orcid': '0000-0002-4060-420X'}],
  'keywords': ['Poster', 'dissemination'],
  'language': 'eng',
  'resource_type': {'title': 'Poster', 'type': 'poster'},
  'license': {'id': 'cc-by-4.0'},
  'grants': [{'code': '723386',
    'internal_id': '10.13039/501100000780::723386',
    'funder': {'name': 'European Commission',
    

In [33]:
from sickle import Sickle
sickle = Sickle('https://zenodo.org/oai2d')

In [34]:
records = sickle.ListRecords(metadataPrefix='oai_datacite4')

In [35]:
len(list(records))

KeyboardInterrupt: 

In [29]:
with open('response.xml', 'wb') as fp:
    fp.write(records.next().raw.encode('utf8'))

In [5]:
record = records.next()

In [19]:
print(record)

<record xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><header><identifier>oai:zenodo.org:4562717</identifier><datestamp>2024-07-19T07:54:36Z</datestamp><setSpec>openaire</setSpec><setSpec>user-coolstars20half</setSpec></header><metadata><oai_datacite xmlns="http://schema.datacite.org/oai/oai-1.1/" xsi:schemaLocation="http://schema.datacite.org/oai/oai-1.1/ http://schema.datacite.org/oai/oai-1.1/oai.xsd"><schemaVersion>4.3</schemaVersion><datacentreSymbol>CERN.ZENODO</datacentreSymbol><payload><resource xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.3/metadata.xsd"><identifier identifierType="DOI">10.1051/0004-6361/202038295</identifier><alternateIdentifiers><alternateIdentifier alternateIdentifierType="oai">oai:zenodo.org:4562717</alternateIdentifier></alternateIdentifiers><creators><creator><creatorName nameType="Personal">C. Cifuentes</cre

In [16]:
record = sickle.GetRecord(identifier='oai:zenodo.org:4562717', metadataPrefix='oai_datacite4')

In [17]:
print(record)

<record xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><header><identifier>oai:zenodo.org:4562717</identifier><datestamp>2024-07-19T07:54:36Z</datestamp><setSpec>openaire</setSpec><setSpec>user-coolstars20half</setSpec></header><metadata><oai_datacite xmlns="http://schema.datacite.org/oai/oai-1.1/" xsi:schemaLocation="http://schema.datacite.org/oai/oai-1.1/ http://schema.datacite.org/oai/oai-1.1/oai.xsd"><schemaVersion>4.3</schemaVersion><datacentreSymbol>CERN.ZENODO</datacentreSymbol><payload><resource xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.3/metadata.xsd"><identifier identifierType="DOI">10.1051/0004-6361/202038295</identifier><alternateIdentifiers><alternateIdentifier alternateIdentifierType="oai">oai:zenodo.org:4562717</alternateIdentifier></alternateIdentifiers><creators><creator><creatorName nameType="Personal">C. Cifuentes</cre

In [14]:
records_posters = sickle.GetRecord(resourcetype='Poster', metadataPrefix='oai_datacite')

HTTPError: 422 Client Error: UNPROCESSABLE ENTITY for url: https://zenodo.org/oai2d?resourcetype=Poster&metadataPrefix=oai_datacite&verb=GetRecord

In [15]:
metadataFormats = sickle.ListMetadataFormats()
list(metadataFormats)

[<MetadataFormat marcxml>,
 <MetadataFormat oai_dc>,
 <MetadataFormat dcat>,
 <MetadataFormat marc21>,
 <MetadataFormat datacite>,
 <MetadataFormat oai_datacite>,
 <MetadataFormat datacite4>,
 <MetadataFormat oai_datacite4>]

# Figshare

In [16]:
# token
figshare_access_token = os.getenv("FIGSHARE_ACCESS_TOKEN")

In [11]:
# inspired from the example available here https://help.figshare.com/article/how-to-use-the-figshare-api#search-ids
BASE_URL = "https://api.figshare.com/v2"


search_logic = ":item_type:poster"
query = '{"search_for": "' + search_logic + '"}'
y = json.loads(query)
results = []
for j in range(1, 2):
    r = json.loads(
        requests.post(
            BASE_URL + "/articles/search?page_size=1000&page={}".format(j), params=y
        ).content
    )
    results.extend(r)
    print(r)

[{'project_id': None, 'id': 4522535, 'title': 'Enhancing Virtual Screening Performance of Protein Kinases with Molecular Dynamics Simulations', 'doi': '10.6084/m9.figshare.4522535.v1', 'handle': '', 'url': 'https://api.figshare.com/v2/articles/4522535', 'published_date': '2017-01-05T20:02:03Z', 'thumb': 'https://s3-eu-west-1.amazonaws.com/pfigshare-u-previews/7318262/thumb.png', 'defined_type': 5, 'defined_type_name': 'poster', 'group_id': None, 'url_private_api': 'https://api.figshare.com/v2/account/articles/4522535', 'url_public_api': 'https://api.figshare.com/v2/articles/4522535', 'url_private_html': 'https://figshare.com/account/articles/4522535', 'url_public_html': 'https://figshare.com/articles/poster/Enhancing_Virtual_Screening_Performance_of_Protein_Kinases_with_Molecular_Dynamics_Simulations/4522535', 'timeline': {'posted': '2017-01-05T20:02:03', 'firstOnline': '2017-01-05T20:02:03'}, 'resource_title': '', 'resource_doi': ''}, {'project_id': None, 'id': 17122289, 'title': 'Com

In [17]:
display(results[0])

{'project_id': None,
 'id': 4522535,
 'title': 'Enhancing Virtual Screening Performance of Protein Kinases with Molecular Dynamics Simulations',
 'doi': '10.6084/m9.figshare.4522535.v1',
 'handle': '',
 'url': 'https://api.figshare.com/v2/articles/4522535',
 'published_date': '2017-01-05T20:02:03Z',
 'thumb': 'https://s3-eu-west-1.amazonaws.com/pfigshare-u-previews/7318262/thumb.png',
 'defined_type': 5,
 'defined_type_name': 'poster',
 'group_id': None,
 'url_private_api': 'https://api.figshare.com/v2/account/articles/4522535',
 'url_public_api': 'https://api.figshare.com/v2/articles/4522535',
 'url_private_html': 'https://figshare.com/account/articles/4522535',
 'url_public_html': 'https://figshare.com/articles/poster/Enhancing_Virtual_Screening_Performance_of_Protein_Kinases_with_Molecular_Dynamics_Simulations/4522535',
 'timeline': {'posted': '2017-01-05T20:02:03',
  'firstOnline': '2017-01-05T20:02:03'},
 'resource_title': '',
 'resource_doi': ''}

In [18]:
api_call_headers = {'Authorization': "token " + str(figshare_access_token)}
r=requests.get(BASE_URL + '/articles/' + str(4522535), headers=api_call_headers)
metadata = json.loads(r.text)

In [19]:
display(metadata)

{'files': [{'id': 7318262,
   'name': 'Enhancing Virtual Screening Performance of Protein Kinases with Molecular Dynamics Simulations.pdf',
   'size': 10124453,
   'is_link_only': False,
   'download_url': 'https://ndownloader.figshare.com/files/7318262',
   'supplied_md5': '441d9a5e225e0326bf3726575c8e4f1f',
   'computed_md5': '441d9a5e225e0326bf3726575c8e4f1f',
   'mimetype': 'application/pdf'}],
 'custom_fields': [],
 'authors': [{'id': 1522795,
   'full_name': 'Tavina L. Offutt',
   'is_active': False,
   'url_name': '_',
   'orcid_id': ''},
  {'id': 82567,
   'full_name': 'Robert V. Swift',
   'is_active': False,
   'url_name': 'Robert_V_Swift',
   'orcid_id': ''},
  {'id': 82566,
   'full_name': 'Rommie E. Amaro',
   'is_active': False,
   'url_name': 'Rommie_E_Amaro',
   'orcid_id': ''}],
 'figshare_url': 'https://figshare.com/articles/poster/Enhancing_Virtual_Screening_Performance_of_Protein_Kinases_with_Molecular_Dynamics_Simulations/4522535',
 'download_disabled': False,
 'de