# Publication references by researcher

This notebook uses the [DataCite GraphQL API](https://api.datacite.org/graphql) to fetch all DataCite references of the DataCite publications authored by a particular researcher, using his/her ORCID ID.

In [1742]:
# Prepare the R graphql client.

library("httr")
library("ghql")
library("jsonlite")
library("IRdisplay")
library("dplyr")
library("igraph")

cli <- GraphqlClient$new(
  url = "https://api.datacite.org/graphql"
)
qry <- Query$new()

In [1743]:
# Generate the GraphQL query: find the researcher by ORCID ID, then fetch the first 50 publications with DataCite DOIs linked to that ORCID account.

query <- '{
   researcher(id: "https://orcid.org/0000-0003-1419-2405") {
    id
    name
    softwares(first: 50) {
      totalCount
      nodes {
        id
        relatedIdentifiers {
          relationType
          relatedIdentifier
          relatedIdentifierType
        }
      }
    }
  }
}'

In [1744]:
# Run the query and parse the JSON response

qry$query('getdata', query)
data <- fromJSON(cli$exec(qry$queries$getdata))

In [1745]:
# Get the name of the researcher with ORCID ID https://orcid.org/0000-0003-1419-2405

display_markdown(data$data$researcher$name)

Martin Fenner

In [1746]:
# Get the number of publications

display_json(data$data$researcher$softwares$totalCount)

ERROR: Error in prepare_content(isbinary, data, file): Either need to specify data or file, but not both


In [None]:
# generate data frame for nodes
researchers <- data.frame(id=data$data$researcher$id, pid_type=c('researcher'))
softwares <- data.frame(id=data$data$researcher$softwares$nodes$id, pid_type=c('dataset'))
references <- data.frame(id=bind_rows(data$data$researcher$softwares$nodes$relatedIdentifiers)[,2], pid_type=c('publication'))
nodes <- unique(rbind(researchers, softwares, references))

# generate data frame for edges
# loop through nodes, as they can have more than one edge
edges <- data.frame(to=datasets[,1], from=data$data$researcher$id)
nodes_with_references <- bind_rows(data$data$researcher$softwares$nodes) %>% filter(lengths(relatedIdentifiers) != 0)
e <- data.frame(to=character(), from=character(),stringsAsFactors=FALSE)
for (i in 1:nrow(nodes_with_references)) {
  # this is a workaround to capture all the edges
  row <- data.frame(to=unlist(nodes_with_references[i,2]), from=nodes_with_references[i,1]) %>% filter(startsWith(as.character(to), '10.'))
  edges <- unique(rbind(edges, row))
}

edges <- edges %>% mutate(to = ifelse(startsWith(as.character(to), '10.'), paste('https://doi.org/', to, sep=''), as.character(to)))

# Some vertex names in edge list are not listed in vertex data frame
# g <- graph_from_data_frame(d=edges, vertices=nodes)
#V(g)$color <- c('#48b1f4', '#47a878')[1+(V(g)$pid_type=="researcher")]
g <- graph_from_data_frame(d=edges)
V(g)$size <- 4
E(g)$arrow.mode <- 0
l <- layout_with_dh(g)
plot(g, vertex.label=NA, layout=l, arrow.mode=0)

In [None]:
# Generate a list of formatted citations in APA format for the publications

ids <- substring(publications[,1], 17)
ids <- paste(ids, collapse = ',')
url <- paste('https://api.datacite.org/dois?style=apa&page[size]=250&sort=created&ids=', ids, sep = '')
response <- GET(url, accept("text/x-bibliography"))
display_markdown('## Publications')
display_markdown(content(response, as = 'text'))

In [None]:
# Generate a list of formatted citations in APA format for the references (if they are DataCite DOIs)

ids <- references[,1]
ids <- paste(ids, collapse = ',')
url <- paste('https://api.datacite.org/dois?style=apa&page[size]=250&sort=created&ids=', ids, sep = '')
response <- GET(url, accept("text/x-bibliography"))
display_markdown('## References')
display_markdown(content(response, as = 'text'))    