# Publication references by researcher

This notebook uses the [DataCite GraphQL API](https://api.datacite.org/graphql) to fetch all DataCite references of the DataCite publications authored by a particular researcher, using his/her ORCID ID.

In [1]:
# Prepare the R graphql client.

library("httr")
library("ghql")
library("jsonlite")
library("IRdisplay")
library("dplyr")
library("igraph")

cli <- GraphqlClient$new(
  url = "https://api.datacite.org/graphql"
)
qry <- Query$new()


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


Attaching package: 'igraph'

The following objects are masked from 'package:dplyr':

    as_data_frame, groups, union

The following objects are masked from 'package:stats':

    decompose, spectrum

The following object is masked from 'package:base':

    union



In [2]:
# Generate the GraphQL query: find the researcher by ORCID ID, then fetch the first 50 publications with DataCite DOIs linked to that ORCID account.

query1 <- '{
   researcher(id: "https://orcid.org/0000-0003-1419-2405") {
    id
    name
    publications(first: 50) {
      totalCount
      nodes {
        id
        relatedIdentifiers {
          relatedIdentifier
        }
      }
    }
  }
}'

query2 <- '{
  researcher(id: "https://orcid.org/0000-0003-0902-4386") {
    id
    name
    publications(first: 50) {
      totalCount
      nodes {
        id
        relatedIdentifiers {
          relatedIdentifier
        }
      }
    }
  }
}'	

In [3]:
# Run the queries and parse the JSON response

qry$query('getdata1', query1)
data1 <- fromJSON(cli$exec(qry$queries$getdata1))

qry$query('getdata2', query1)
data2 <- fromJSON(cli$exec(qry$queries$getdata2))
data1
data2

id,relatedIdentifiers
<chr>,<list>
https://doi.org/10.6084/m9.figshare.154691,
https://doi.org/10.6084/m9.figshare.154691.v1,10.6084/m9.figshare.154691
https://doi.org/10.6084/m9.figshare.107019.v4,10.6084/m9.figshare.107019
https://doi.org/10.6084/m9.figshare.107019,
https://doi.org/10.5281/zenodo.1297432,10.5281/zenodo.1297431
https://doi.org/10.5281/zenodo.1297431,10.5281/zenodo.1297432
https://doi.org/10.23640/07243.5985004.v1,10.23640/07243.5985004
https://doi.org/10.5281/zenodo.1120249,"10.5281/zenodo.1120275, 10.5281/zenodo.1120248"
https://doi.org/10.5281/zenodo.1120265,"10.5281/zenodo.1120275, 10.5281/zenodo.1120248"
https://doi.org/10.5281/zenodo.1120248,"10.5281/zenodo.1120275, 10.5281/zenodo.1120249, 10.5281/zenodo.1120261, 10.5281/zenodo.1120265"


id,relatedIdentifiers
<chr>,<list>
https://doi.org/10.6084/m9.figshare.154691,
https://doi.org/10.6084/m9.figshare.154691.v1,10.6084/m9.figshare.154691
https://doi.org/10.6084/m9.figshare.107019.v4,10.6084/m9.figshare.107019
https://doi.org/10.6084/m9.figshare.107019,
https://doi.org/10.5281/zenodo.1297432,10.5281/zenodo.1297431
https://doi.org/10.5281/zenodo.1297431,10.5281/zenodo.1297432
https://doi.org/10.23640/07243.5985004.v1,10.23640/07243.5985004
https://doi.org/10.5281/zenodo.1120249,"10.5281/zenodo.1120275, 10.5281/zenodo.1120248"
https://doi.org/10.5281/zenodo.1120265,"10.5281/zenodo.1120275, 10.5281/zenodo.1120248"
https://doi.org/10.5281/zenodo.1120248,"10.5281/zenodo.1120275, 10.5281/zenodo.1120249, 10.5281/zenodo.1120261, 10.5281/zenodo.1120265"


In [4]:
# Get the name of the researcher with ORCID ID https://orcid.org/0000-0003-1419-2405

display_markdown(data$data)

ERROR: Error in data$data: Objekt des Typs 'closure' ist nicht indizierbar


In [None]:
# Get the number of publications

display_json(data$data$researcher$publications$totalCount)

In [None]:
# generate data frame for nodes
researchers <- data.frame(id=data$data$researcher$id, pid_type=c('researcher'))
publications <- data.frame(id=data$data$researcher$publications$nodes$id, pid_type=c('publication'))
references <- data.frame(id=bind_rows(data$data$researcher$publications$nodes$relatedIdentifiers)[,1], pid_type=c('publication'))
nodes <- unique(rbind(researchers, publications, references))

# loop through nodes, as they can have more than one edge
edges <- data.frame(to=publications[,1], from=data$data$researcher$id)
nodes_with_references <- bind_rows(data$data$researcher$publications$nodes) %>% filter(lengths(relatedIdentifiers) != 0)

for (i in 1:nrow(nodes_with_references)) {
  row <- data.frame(to=unlist(nodes_with_references[i,2]), from=nodes_with_references[i,1])
  edges <- unique(rbind(edges, row))
}

# express DOIs as URLs
nodes <- nodes %>% mutate(id = ifelse(startsWith(as.character(id), '10.'), paste('https://doi.org/', id, sep=''), as.character(id)))
edges <- edges %>% mutate(to = ifelse(startsWith(as.character(to), '10.'), paste('https://doi.org/', to, sep=''), as.character(to)))

g <- graph_from_data_frame(d=unique(edges), vertices=unique(nodes))
V(g)$color <- c('#48b1f4', '#47a878')[1+(V(g)$pid_type=="researcher")]
V(g)$size <- 4
E(g)$arrow.mode <- 0
l <- layout_with_dh(g)
plot(g, vertex.label=NA, layout=l, arrow.mode=0)