In [1]:
library(tidyverse)
# install.packages("rcrossref")
library(rcrossref)

-- [1mAttaching packages[22m --------------------------------------- tidyverse 1.3.2 --
[32mv[39m [34mggplot2[39m 3.4.1     [32mv[39m [34mpurrr  [39m 1.0.1
[32mv[39m [34mtibble [39m 3.1.8     [32mv[39m [34mdplyr  [39m 1.1.0
[32mv[39m [34mtidyr  [39m 1.3.0     [32mv[39m [34mstringr[39m 1.5.0
[32mv[39m [34mreadr  [39m 2.1.4     [32mv[39m [34mforcats[39m 1.0.0
-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


In [2]:
# Make sure all columns of a tibble are being shown
options(repr.matrix.max.rows = Inf, repr.matrix.max.cols = Inf)

In [3]:
grants <- read_delim("2_GrantNumbersWithPublicatons.csv", delim = ";", col_names=TRUE)

[1mRows: [22m[34m4353[39m [1mColumns: [22m[34m1[39m
[36m--[39m [1mColumn specification[22m [36m--------------------------------------------------------[39m
[1mDelimiter:[22m ";"
[32mdbl[39m (1): GrantNumber

[36mi[39m Use `spec()` to retrieve the full column specification for this data.
[36mi[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [4]:
dim(grants)
head(grants)

GrantNumber
<dbl>
149634
146722
150754
157023
172242
156019


In [5]:
publications <- read_delim("../raw/OutputdataScientificPublication.csv", 
                           col_select = c(
                               "GrantNumber",
                               "ScientificPublication_DOI",
                               "ScientificPublication_Year"
                           ), 
                           delim = ";", col_names=TRUE)

[1mRows: [22m[34m173212[39m [1mColumns: [22m[34m3[39m
[36m--[39m [1mColumn specification[22m [36m--------------------------------------------------------[39m
[1mDelimiter:[22m ";"
[31mchr[39m (1): ScientificPublication_DOI
[32mdbl[39m (2): GrantNumber, ScientificPublication_Year

[36mi[39m Use `spec()` to retrieve the full column specification for this data.
[36mi[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [6]:
dim(publications)

In [7]:
# Keep only publications with a DOI
publications <- filter(publications, !is.na(ScientificPublication_DOI))

In [8]:
dim(publications)

In [9]:
# Keep only the publications of the grants we're interested in (i.e. the biomed grants!)
publications <- filter(publications, GrantNumber %in% grants$GrantNumber)

In [10]:
dim(publications)

In [11]:
# Trim the DOIS
publications$ScientificPublication_DOI <- str_trim(publications$ScientificPublication_DOI)

In [12]:
# Apparently, some DOIs in the original data are not well-formed!!!

# substring <- "http"
# result <- publications[grepl(substring, publications$ScientificPublication_DOI), ]
# result

# substring <- "works"
# result <- publications[grepl(substring, publications$ScientificPublication_DOI), ]
# result

In [13]:
# We remove the most obvious mistakes

# http://dx.doi.org/
# https://doi.org/
# http://link.springer.com/article/
# http://onlinelibrary.wiley.com/doi/

sub_strings <- "http://dx.doi.org/|https://doi.org/|http://link.springer.com/article/|http://onlinelibrary.wiley.com/doi/"

# Remove the substrings publications$ScientificPublication_DOI
publications$ScientificPublication_DOI <- sub(sub_strings, "", publications$ScientificPublication_DOI)

In [14]:
# Split the data frame into batches of 50 rows in order to retrieve citations per publication from Crossref in batches of 50
publication_batches <- split(publications, rep(1:ceiling(nrow(publications)/50), each = 50, length.out = nrow(publications)))

In [15]:
# How many batches of 50 do we have?
end_index <- length(publication_batches)
end_index

In [16]:
# Prepare a bucket to append each batch's citation data
bucket <- tibble(doi = character(), is.referenced.by.count = character())

# Write an empty file with today's date in the name to keep the citations results
date_string <- format(Sys.Date(), "%Y%m%d")
file_name <- sprintf("3_PublicationCitations_%s.csv", date_string)
write.table(bucket, file_name, sep=";", fileEncoding="UTF-8", row.names = FALSE)

In [17]:
start_index <- 1

# Loop from 1 to the number of batches of 50
# To avoid timeouts at the Crossref server, there are quite some system delays implemented in this loop.
# ...also, the Crossref API is not too fast. For 600 batches of 50 DOIs, this loop runs probabyl 10 to 11 hours!!!

for (i in start_index:end_index) {
    
    # tryCatch to deal with Crossref server timeouts
    tryCatch({
        
        # The bucket for the current batch of DOIs
        batch_bucket <- tibble(doi = character(), is.referenced.by.count = character())
        
        # The current batch index as a string
        idx <- sprintf("%s", i)
      
        # The DOIs of the current batch of 50
        dois <- publication_batches[i][[idx]]$ScientificPublication_DOI
      
        # Print some progress to the console
        cat(sprintf("-%s", i))
        flush.console()
      
        # Search for records in Crossref
        results <- cr_works(dois)
        citation_counts <- select(results$data, c(doi, is.referenced.by.count))
        
        # Append to current bucket
        batch_bucket <- rbind(batch_bucket, citation_counts)
        
        # Write results so far to a file, in case of a desaster
        write.table(batch_bucket, file_name, sep = ";", append = TRUE, row.names = FALSE, col.names = FALSE)

        # 30 seconds delay before the next request  
        Sys.sleep(30)
      
  }, error = function(e) {
        
        # Print error message
        cat(sprintf("-%s", e$message))
        flush.console()
      
        next_index <- i + 1
      
        if (next_index <= end_index) {
            # If the loop isn't done yet, restart at the last index
            # ...after a 60 seconds delay to avoid running into the next timeout by the Crossref server
            Sys.sleep(60)
            i <- next_index - 1
        } else {
            stop(e)
        }
  })
}

print("Loop done")

-1

"404 (client error): /works/10.1074/jbc.M114.570879. - Resource not found."


-2-3-4-5-6

"404 (client error): /works/10.1371/journal.pone.019785 - Resource not found."


-7-8

"404 (client error): /works/doi:10.1371/journal.%20pone.0163475 - Resource not found."


-9

"404 (client error): /works/10.1016/j.euroneuro.2016.10.009. - Resource not found."


-10-11-12-13

"404 (client error): /works/10.1016/jdiagmicrobio.2020.115280 - Resource not found."


-14-15

"404 (client error): /works/10.1371 - Resource not found."


-16-17-18-19

"404 (client error): /works/10.1111/apt.16145 - Resource not found."
"404 (client error): /works/10.4414/phc-d.2022.20108 - Resource not found."


-20-21-22-23

"404 (client error): /works/10.34763/jmotherandchild - Resource not found."
"404 (client error): /works/10.1186/s13054-015-0751-x. - Resource not found."
"404 (client error): /works/PMID:%2023946633 - Resource not found."


-24-25-26-27-28

"404 (client error): /works/10.1016/j.arthro.2015.12.0 - Resource not found."


-29-30

"404 (client error): /works/10.1038/emi.2017.71 - Resource not found."


-31-32-33-34-35

"404 (client error): /works/10.1152/ajpcell.00027.2014. - Resource not found."


-36-37-38-39

"404 (client error): /works/doi:10.5194/sd-4-1-2016 - Resource not found."
"404 (client error): /works/10.18632/oncotarget8385 - Resource not found."
"404 (client error): /works/10.1111/jne.12342. - Resource not found."


-40

"404 (client error): /works/10.1016/j.semcdb.2016.09.01010.1016/j.semcdb.2016.09.010.epub2016sep28 - Resource not found."
"404 (client error): /works/Nov%201:jc20162449 - Resource not found."
"404 (client error): /works/10.1093/schbul/sbt180. - Resource not found."


-41

"404 (client error): /works/10.1016/j.neuropharm.2013.04.019. - Resource not found."


-42-43

"404 (client error): /works/10.3171/2017.7 - Resource not found."
"404 (client error): /works/10.3897/phytokeys.205.76821 - Resource not found."


-44-45-46-47-48-49

"404 (client error): /works/10.5281/zenodo.7303672 - Resource not found."


-50-51

"404 (client error): /works/PMC4421898 - Resource not found."
"404 (client error): /works/dx.doi.org/10.5167/uzh-110665 - Resource not found."


-52-53

"404 (client error): /works/10.1021/acs.analchem.6b0336510.1021/acs.analchem.6b03365 - Resource not found."


-54

"404 (client error): /works/10.3929/ethz-b-000535274 - Resource not found."


-55

"404 (client error): /works/10.1016/j.schres.2014.01.015. - Resource not found."


-56

"404 (client error): /works/10.3929/ethz-b-000439004 - Resource not found."


-57-58-59-60-61-62-63-64-65-66-67-68

"404 (client error): /works/smw.2016.14349 - Resource not found."


-69

"404 (client error): /works/10.1155/2020/1948602) - Resource not found."


-70-71

"404 (client error): /works/10.21256/zhaw-20440 - Resource not found."


-72-73-74-75-76-77-78-79

"404 (client error): /works/10.13097/archive-ouverte/unige:151796 - Resource not found."


-80-81

"404 (client error): /works/10.1016/j.anbehay.2015.11.022 - Resource not found."
"404 (client error): /works/doi:%2010.1007/978-3-319-46503-6_13. - Resource not found."


-82

"404 (client error): /works/10.1016/j.ab.2015.04.039. - Resource not found."


-83

"404 (client error): /works/10.1007/s11606-02-073017 - Resource not found."
"404 (client error): /works/http:/â€‹/â€‹dx.â€‹doi.â€‹org/â€‹10.â€‹1104/â€‹pp.â€‹112.â€‹213280 - Resource not found."


-84-85

"404 (client error): /works/TBD - Resource not found."
"404 (client error): /works/10.7916/d8xd127w - Resource not found."


-86-87-88

"404 (client error): /works/10.1160/TH13-07-0531. - Resource not found."
"404 (client error): /works/10.1111/eci.12238. - Resource not found."
"404 (client error): /works/10.1155/2012/868251. - Resource not found."
"404 (client error): /works/10.1210/en.2012-1784. - Resource not found."


-89

"404 (client error): /works/36(10):1445-1458 - Resource not found."


-90

"404 (client error): /works/10.5075/epfl-thesis-10014 - Resource not found."


-91-92-93-94-95

"404 (client error): /works/10.1038/s41598-017-16613-w10.1038/s41598-017-16613-w - Resource not found."
"404 (client error): /works/10.1007/978-1-4939-6993-7_1010.1007/978-1-4939-6993-7_10 - Resource not found."
"404 (client error): /works/10.1038/srep4168610.1038/srep41686 - Resource not found."
"404 (client error): /works/10.13128/bae-7671 - Resource not found."


-96-97

"404 (client error): /works/10.1016/j.jaci.2013.12.1082. - Resource not found."
"404 (client error): /works/DOI:%2010.1038 - Resource not found."


-98-99-100-101-102-103

"404 (client error): /works/10.1016/j.ydbio.2014.08.016.epub2014aug27 - Resource not found."
"404 (client error): /works/10.1000/182 - Resource not found."


-104-105

"404 (client error): /works/10.1101/530584v2 - Resource not found."


-106-107

"404 (client error): /works/10.6094/klinpfleg.3.48 - Resource not found."
"404 (client error): /works/10.1016/j.actbio.2016.07.041. - Resource not found."
"404 (client error): /works/10.11588/cipeg.2017.1.44165 - Resource not found."


-108-109-110-111

"404 (client error): /works/http:/â€‹/â€‹dx.â€‹doi.â€‹org/â€‹10.â€‹1105/â€‹tpc.â€‹112.â€‹106898 - Resource not found."
"404 (client error): /works/http:/â€‹/â€‹dx.â€‹doi.â€‹org/â€‹10.â€‹1105/â€‹tpc.â€‹111.â€‹095083 - Resource not found."
"404 (client error): /works/http:/â€‹/â€‹dx.â€‹doi.â€‹org/â€‹10.â€‹1105/â€‹tpc.â€‹113.â€‹111484 - Resource not found."


-112

"404 (client error): /works/10.1128/genomea.00143-16.copyright - Resource not found."


-113

"404 (client error): /works/10.1371/journal.pone.0125603.eCollection%202015. - Resource not found."
"404 (client error): /works/10.1523/JNEUROSCI.5212-13.2014. - Resource not found."


-114-115-116-117

"404 (client error): /works/10.1038/gim.2014.166. - Resource not found."
"404 (client error): /works/10.3929/ethz-b-000399277 - Resource not found."


-118

"404 (client error): /works/10.1002 - Resource not found."


-119

"404 (client error): /works/10.1371 - Resource not found."


-120-121-122-123

"404 (client error): /works/10.4172 - Resource not found."


-124

"404 (client error): /works/10.17590/asr.0000201 - Resource not found."


-125-126

"404 (client error): /works/doi:%2010.21037/tau.2016.02.01. - Resource not found."


-127-128-129-130

"404 (client error): /works/10.1016/j.juro.2016.04.061. - Resource not found."


-131

"404 (client error): /works/10.29392//001c.11943 - Resource not found."
"404 (client error): /works/doi:%2010.1038/srep42108. - Resource not found."


-132

"404 (client error): /works/10.1111/12362 - Resource not found."


-133-134-135-136-137

"404 (client error): /works/doi:%2010.1074/jbc.M114.589002. - Resource not found."
"404 (client error): /works/10.1016/j.jbiomech.2013.03.024. - Resource not found."
"404 (client error): /works/10.3174/ajnr.A3662. - Resource not found."
"404 (client error): /works/10.1158/2159-8290 - Resource not found."


-138-139

"404 (client error): /works/10.15496/publikation-20836 - Resource not found."


-140

"404 (client error): /works/smw.2017.14411 - Resource not found."


-141

"404 (client error): /works/10.â€‹1007/â€‹s00035-015-0152-4 - Resource not found."
"404 (client error): /works/10.1007/s11655-017-2974-y10.1007/s11655-017-2974-y.epub2017oct8 - Resource not found."


-142

"404 (client error): /works/urn:nbn:de:bsz:16-heidok-184445 - Resource not found."


-143-144-145-146-147

"404 (client error): /works/doi:%2010.1158/1078-0432.CCR-15-1054. - Resource not found."
"404 (client error): /works/doi:%2010.1016/S2352-3026(15)00148-9. - Resource not found."
"404 (client error): /works/doi:%2010.1200/JCO.2015.61.3968. - Resource not found."
"404 (client error): /works/doi:%2010.1182/blood-2015-05-647172. - Resource not found."


-148-149-150-151-152-153

"404 (client error): /works/10.11014/pp.114.239137 - Resource not found."


-154-155

"404 (client error): /works/10.3389/fnana.2019.00050_rouiller - Resource not found."
"404 (client error): /works/10.1186/s13059-016-1006- - Resource not found."


-156-157

"404 (client error): /works/10.1007/978-3-319-55050-33 - Resource not found."
"404 (client error): /works/10.1038/nprot.2017.07210.1038/nprot.2017.072.epub2017aug24 - Resource not found."
"404 (client error): /works/10.1007/978-1-4939-6993-7_1110.1007/978-1-4939-6993-7_11 - Resource not found."
"404 (client error): /works/10.5281/zenodo.4699482 - Resource not found."


-158-159-160-161-162

"404 (client error): /works/10.1021/142 - Resource not found."


-163-164-165-166-167-168

"404 (client error): /works/10.4436/jass.98020 - Resource not found."


-169-170

"404 (client error): /works/ISBN%20978-954-2961-75-8 - Resource not found."


-171-172-173-174-175-176-177-178-179-180-181

"404 (client error): /works/10.16910/12 - Resource not found."


-182

"404 (client error): /works/10.1890/15-1110.1/abstract - Resource not found."


-183-184-185-186

"404 (client error): /works/10.3390/ijms/19040932 - Resource not found."
"404 (client error): /works/10.1172/jci7160 - Resource not found."
"404 (client error): /works/10.1164/ajrccm-conference.2015.191.1_meetingabstracts.a5298 - Resource not found."


-187-188-189-190

"404 (client error): /works/10.3929/ethz-a-010881600 - Resource not found."


-191-192-193-194-195-196-197

"404 (client error): /works/0.1101/gad.221374.113 - Resource not found."
"400 (client error): /works/http://proceedings.spiedigitallibrary.org/proceeding.aspx - Parameter articleid specified but there is no such parameter available on any route"


-198-199

"404 (client error): /works/CD002768 - Resource not found."


-200-201

"404 (client error): /works/10.1038/ki.2013.199.%20Epub%202013%20May%2029. - Resource not found."
"404 (client error): /works/10.1111/joim.12214.%20Epub%202014%20Mar%202 - Resource not found."


-202-203-204-205

"404 (client error): /works/10.12691/ajmcr-4-12-4 - Resource not found."


-206

"404 (client error): /works/10.1007/978 - Resource not found."
"404 (client error): /works/10.30486/ijrowa.2021.1899111.1071 - Resource not found."


-207

"404 (client error): /works/10.14601/phytopathol_mediterr-20292 - Resource not found."


-208-209-210

"404 (client error): /works/10.1016/j.ab.2013.11.013. - Resource not found."


-211

"404 (client error): /works/10.3945/â€‹ajcn.115.111732 - Resource not found."


-212

"404 (client error): /works/doi:%2010.1038/534185a. - Resource not found."


-213-214

"404 (client error): /works/10.1000/182_ei_106477 - Resource not found."


-215-216

"404 (client error): /works/10.1159 - Resource not found."


-217-218

"404 (client error): /works/10.3978/j.issn.2218-6751.2014.02.03 - Resource not found."
"404 (client error): /works/.%20PloS%20one%209:e86379 - Resource not found."
"404 (client error): /works/10.1371 - Resource not found."


-219-220-221

"404 (client error): /works/http://bioconductor.org/packages/release/bioc/html/cosmiq.html - Resource not found."


-222

"404 (client error): /works/doi:%2010.1038/ncomms4695. - Resource not found."
"404 (client error): /works/10.1155/2012/868251. - Resource not found."
"404 (client error): /works/10.4330/wjc.v6.i5.260. - Resource not found."
"404 (client error): /works/10.1111/j.1744-9987.2012.01102.x. - Resource not found."
"404 (client error): /works/10.1111/jre.12014. - Resource not found."


-223-224-225

"404 (client error): /works/10.1523 - Resource not found."


-226-227-228

"404 (client error): /works/NA - Resource not found."
"404 (client error): /works/10.7892/boris.143354 - Resource not found."


-229-230

"404 (client error): /works/10.5075/epfl-thesis-9395 - Resource not found."


-231

"404 (client error): /works/10.1684/nrp.2022.0700 - Resource not found."


-232-233-234-235

"404 (client error): /works/10.1158/1535-7163 - Resource not found."


-236-237

"404 (client error): /works/10.4414/fms.2019.08072 - Resource not found."
"404 (client error): /works/10.1038/nmeth.457 - Resource not found."
"404 (client error): /works/10.1146/annurev-genet-050720-12291 - Resource not found."


-238

"404 (client error): /works/doi:10.1371 - Resource not found."


-239

"404 (client error): /works/10.26124/mitofit:190001.v6 - Resource not found."


-240

"404 (client error): /works/10.5281/zenodo.7131289 - Resource not found."


-241

"404 (client error): /works/10.1038/srep21559(2016) - Resource not found."


-242-243

"404 (client error): /works/smw.2017.14410 - Resource not found."


-244

"404 (client error): /works/10.1017/s0140525x16000959,e195 - Resource not found."


-245-246-247-248

"404 (client error): /works/10.1080/216240x.2016.1234565 - Resource not found."
"404 (client error): /works/10.1111/all.13041.[epubaheadofprint] - Resource not found."


-249-250

"404 (client error): /works/doi:%2010.1134/S000629791406008X. - Resource not found."


-251

"404 (client error): /works/10.1016/j.cell - Resource not found."


-252-253-254-255-256-257

"404 (client error): /works/10.1002/humu.2319 - Resource not found."
"404 (client error): /works/10.1097/01.mib.0000512757.76105.34 - Resource not found."


-258-259-260

"404 (client error): /works/http:/â€‹/â€‹dx.â€‹doi.â€‹org/â€‹10.â€‹1105/â€‹tpc.â€‹114.â€‹129031 - Resource not found."
"404 (client error): /works/10.1101/gad.350140.122 - Resource not found."
"404 (client error): /works/doi:%2010.3324/haematol.2015.135780. - Resource not found."
"404 (client error): /works/doi:%2010.4414/smw.2015.14100.%20eCollection%202015. - Resource not found."
"404 (client error): /works/doi:%2010.1016/j.leukres.2015.03.004. - Resource not found."
"404 (client error): /works/doi:%2010.1080/23744235.2016.1205216. - Resource not found."


-261-262

"404 (client error): /works/10.1002/biof.1278 - Resource not found."


-263-264-265-266

"404 (client error): /works/doi:%2010.3389/fgene.2013.00060. - Resource not found."
"404 (client error): /works/10.1002/prca.201400125. - Resource not found."
"404 (client error): /works/10.3174/ajnr.A3322. - Resource not found."
"404 (client error): /works/10.4414/smw.2018.14637 - Resource not found."
"404 (client error): /works/10.1160/TH13-08-0712. - Resource not found."
"404 (client error): /works/10.4330/wjc.v6.i5.314. - Resource not found."


-267-268

"404 (client error): /works/10.6084/m9 - Resource not found."
"404 (client error): /works/10.4414/smw.2015.14138.%20eCollection%202015. - Resource not found."
"404 (client error): /works/10.1371/journal.pone.0105210.ecollection2014 - Resource not found."


-269-270

"404 (client error): /works/10.1371/journal.pgen.1006354. - Resource not found."


-271-272

"404 (client error): /works/Doi:10.1038 - Resource not found."


-273-274

"404 (client error): /works/10.1371/journal.pone.0154699.%20eCollection%202016. - Resource not found."
"404 (client error): /works/10.5061/dryad.b4mr5/1 - Resource not found."


-275-276-277-278-279

"404 (client error): /works/10.1016/j.neuron.2017 - Resource not found."


-280-281-282-283

"404 (client error): /works/10.1016/j.clnu.2016.04.004. - Resource not found."


-284-285-286-287-288

"404 (client error): /works/10.1016/j.neuron.2015.11.033. - Resource not found."


-289

"404 (client error): /works/10.15502/9783958570825 - Resource not found."


-290-291-292-293

"404 (client error): /works/doi:%2010.3324/haematol.2015.130013. - Resource not found."
"404 (client error): /works/doi:%2010.1002/hon.2307. - Resource not found."
"404 (client error): /works/doi:%2010.1016/j.leukres.2015.03.015. - Resource not found."


-294-295-296-297

"404 (client error): /works/10.5281/zenodo.4748914 - Resource not found."
"404 (client error): /works/10.4414/smw.2018.14637 - Resource not found."


-298

"404 (client error): /works/10.1109/tpami - Resource not found."


-299-300-301

"404 (client error): /works/10.1038 - Resource not found."


-302-303-304-305

"404 (client error): /works/10.1016/j.jaci.2012.12.1562. - Resource not found."


-306

"404 (client error): /works/10.1101/2022.03.30 - Resource not found."


-307-308

"404 (client error): /works/10.1016/j.ejmp.2015.09.01 - Resource not found."


-309-310-311

"404 (client error): /works/10.1101/690495v2 - Resource not found."


-312-313-314-315

"404 (client error): /works/1016/j.jmb.2013.09.025 - Resource not found."


-316-317

"404 (client error): /works/10.26049/vz69-3-2019-03 - Resource not found."


-318-319

"404 (client error): /works/10.1155/2013/159124. - Resource not found."


-320-321-322-323

"404 (client error): /works/10.ann/rscb-2016-0004:rscb - Resource not found."
"404 (client error): /works/10.3929/ethz-a-010611136 - Resource not found."


-324-325

"404 (client error): /works/10.3897/neobiota.65.58380 - Resource not found."


-326-327

"404 (client error): /works/10.1371/journal.pone.0177555.ecollection2017 - Resource not found."


-328-329-330

"404 (client error): /works/10.4467/16890027ap.16.009.4944 - Resource not found."
"404 (client error): /works/10.1517/14656566.2014.903923.%20Epub%202014%20Mar%2028 - Resource not found."
"404 (client error): /works/10.1016/j.freeradbiomed - Resource not found."


-331-332

"404 (client error): /works/10.4414/smw.2019.20004 - Resource not found."


-333-334-335

"404 (client error): /works/10.1093/ecco-jcc/jjw013. - Resource not found."


-336-337

"404 (client error): /works/ISBN%20978-954-2961-75-8 - Resource not found."


-338-339

"404 (client error): /works/j.copbio.2013.08.017 - Resource not found."
"404 (client error): /works/10.4207/pa.2019.art134 - Resource not found."


-340-341-342-343

"404 (client error): /works/10.13097/archive-ouverte/unige:94660 - Resource not found."
"404 (client error): /works/10.1007/978-1-0716-2541-5_10 - Resource not found."
"404 (client error): /works/10.5167/uzh-150772 - Resource not found."


-344

"404 (client error): /works/doi:%2010.3978/j.issn.2305-5839.2015.09.44 - Resource not found."


-345-346-347-348-349

"404 (client error): /works/10.1126/4298 - Resource not found."


-350-351-352-353

"404 (client error): /works/10.3929/ethz-b-000535274 - Resource not found."
"404 (client error): /works/10.1016/j.jbiomech.2014.06.017. - Resource not found."


-354-355-356-357

"404 (client error): /works/10.1016/j.mce.2015.12.014. - Resource not found."


-358-359

"404 (client error): /works/10.48550/arxiv.2107.06407 - Resource not found."


-360-361

"404 (client error): /works/doi:%2010.1200/JCO.2014.58.9846. - Resource not found."
"404 (client error): /works/doi:%2010.1016/j.bbagrm.2015.07.003. - Resource not found."
"404 (client error): /works/doi:%2010.3389/fimmu.2015.00588. - Resource not found."
"404 (client error): /works/doi:%2010.1111/bjh.14049. - Resource not found."


-362

"404 (client error): /works/10.1000/182 - Resource not found."


-363-364-365-366

"404 (client error): /works/10.1080/15476286.2014.996494. - Resource not found."
"404 (client error): /works/10.1016/j.jgar - Resource not found."


-367-368-369-370

"404 (client error): /works/10.1016/j.immuni - Resource not found."
"404 (client error): /works/10.1136/medethics-2014-102091.%20Epub%202014%20Aug%2020 - Resource not found."


-371

"404 (client error): /works/n.a. - Resource not found."


-372-373

"404 (client error): /works/10.1152/ajpgi.00297 - Resource not found."


-374

"404 (client error): /works/10.10007/s000-012-0983-8 - Resource not found."


-375

"404 (client error): /works/10.1007/978-3-319-47157-02 - Resource not found."


-376

"404 (client error): /works/10.1073/pnas.1516546113. - Resource not found."


-377

"404 (client error): /works/10.1016/j.thromres.2013.08.015. - Resource not found."


-378

"404 (client error): /works/10.1101/202 - Resource not found."


-379-380-381-382-383-384-385-386

"404 (client error): /works/10.2390/biecoll-jib-2014-240 - Resource not found."


-387

"404 (client error): /works/10.5281/zenodo.33990 - Resource not found."


-388

"404 (client error): /works/10.17170/kobra-202107134319 - Resource not found."
"404 (client error): /works/10.17170/kobra-202102113200 - Resource not found."


-389

"404 (client error): /works/10.106/j.celrep.2015.12.095 - Resource not found."
"404 (client error): /works/10.1101/816 - Resource not found."


-390

"404 (client error): /works/10.1016/j.virusres.2014 - Resource not found."


-391

"404 (client error): /works/10.1097/txd.00000000000000959 - Resource not found."


-392

"404 (client error): /works/10.11111/ctr.12896 - Resource not found."


-393-394

"404 (client error): /works/doi:%2010.1002/hon.2348. - Resource not found."


-395-396-397-398-399-400-401-402-403-404-405

"404 (client error): /works/doi:%2010.1146/annurev-genet-112414-055214. - Resource not found."


-406-407-408-409-410-411

"404 (client error): /works/10.1016/j.exer.2015 - Resource not found."


-412

"404 (client error): /works/10.1680/ecsmge.60678 - Resource not found."


-413

"404 (client error): /works/10.3929/ethz-b-000314013 - Resource not found."


-414-415-416

"404 (client error): /works/10.1523/0484 - Resource not found."
"404 (client error): /works/10.4414/smw.2013.13781. - Resource not found."
"404 (client error): /works/10.1155/2012/515692. - Resource not found."
"404 (client error): /works/10.1160/TH13-02-0121. - Resource not found."
"404 (client error): /works/10.1160/TH13-03-0211. - Resource not found."


-417-418-419-420-421-422

"404 (client error): /works/10.1097/tp.00000000001700 - Resource not found."
"404 (client error): /works/10.1016/j.hep - Resource not found."


-423

"404 (client error): /works/10.4455/eu.2020.002 - Resource not found."
"404 (client error): /works/10.4455/eu.2016.046 - Resource not found."


-424

"404 (client error): /works/10.1016/j.radonc.2013.03.020. - Resource not found."


-425-426-427

"404 (client error): /works/10.1038/ncomms3848. - Resource not found."


-428-429

"404 (client error): /works/10.1016/j - Resource not found."


-430-431-432-433-434-435-436-437-438

"404 (client error): /works/10.1038/tp.2016.176. - Resource not found."
"404 (client error): /works/10.1016 - Resource not found."


-439

"404 (client error): /works/10.5281/zenodo.34002 - Resource not found."


-440-441-442-443-444-445-446-447

"404 (client error): /works/doi:%2010.1016/j.ccell.2014. - Resource not found."


-448

"404 (client error): /works/10.1038/ng.3103. - Resource not found."


-449-450

"404 (client error): /works/10.3929/ethz-b-000421901 - Resource not found."


-451-452

"404 (client error): /works/10.2312/bzpm_0716_2018 - Resource not found."
"404 (client error): /works/under%20revision - Resource not found."
"404 (client error): /works/doi:%2010.1016/j.neuron.2014.11.027.%20Epub%202014%20Dec%2018. - Resource not found."


-453-454-455-456-457-458-459

"404 (client error): /works/10.1371/journal.pone.0155546.%20eCollection%202016. - Resource not found."


-460-461

"404 (client error): /works/gkw1059 - Resource not found."
"404 (client error): /works/doi:%2010.1093/hmg/ddu202.%20Epub%202014%20May%202. - Resource not found."


-462-463

"404 (client error): /works/10.1093/databasse/bav056 - Resource not found."


-464-465

"404 (client error): /works/10.1371 - Resource not found."


-466

"404 (client error): /works/10.1261/rna.047209.114. - Resource not found."


-467

"404 (client error): /works/10.1101/gad.277665 - Resource not found."


-468-469-470-471-472-473

"404 (client error): /works/10.1371/journal.pone.0150219.ecollection2016 - Resource not found."


-474-475-476-477-478-479-480-481-482

"404 (client error): /works/10.5281/zenodo.14985 - Resource not found."


-483-484

"404 (client error): /works/10.4414 - Resource not found."


-485-486-487-488-489-490-491

"404 (client error): /works/10.2105/AJPH.2016.303629a. - Resource not found."


-492-493

"404 (client error): /works/FRONTIERS%20IN%20PLANT%20SCIENCE - Resource not found."


-494-495

"404 (client error): /works/10.1101/966119 - Resource not found."


-496

"404 (client error): /works/10.1093/ijnp/pyv020. - Resource not found."


-497-498

"404 (client error): /works/10.1016/j.cell.2015.07.022. - Resource not found."


-499

"404 (client error): /works/NA - Resource not found."


-500-501-502

"404 (client error): /works/10.13140/rg.2.2.17947.72487 - Resource not found."


-503-504-505-506-507-508

"404 (client error): /works/10.1371/journal.pone.0080743. - Resource not found."
"404 (client error): /works/10.1111/jth.13257. - Resource not found."


-509-510-511

"404 (client error): /works/doi:%2010.1158/0008-5472 - Resource not found."
"404 (client error): /works/10.5281/zenodo.7131289 - Resource not found."


-512-513

"404 (client error): /works/10.21256/zhaw-18865 - Resource not found."


-514

"404 (client error): /works/10.1080/08941939 - Resource not found."
"404 (client error): /works/10.1097/tp.00000000000001723 - Resource not found."


-515

"404 (client error): /works/10.1136/bmjopen-2017-016972 - Resource not found."
"404 (client error): /works/10.12688/f1000research.19423 - Resource not found."


-516-517-518-519-520

"404 (client error): /works/doi:%2010.1038/gt.2014 - Resource not found."


-521

"404 (client error): /works/10.15502/9783958570023 - Resource not found."


-522

"404 (client error): /works/10.1038/019 - Resource not found."


-523-524-525

"404 (client error): /works/10.1007/s00467-016-3419-0. - Resource not found."


-526-527-528

"404 (client error): /works/10.1073/pnas.131862811 - Resource not found."


-529-530-531-532

"404 (client error): /works/10.1038/s41598 - Resource not found."


-533-534-535

"404 (client error): /works/10.3174/ajnr.A4063. - Resource not found."


-536-537

"404 (client error): /works/10.1016/%20j.cub.2017.01.027 - Resource not found."


-538-539-540-541

"404 (client error): /works/10.1074/jbc.M114.622274. - Resource not found."
"404 (client error): /works/10.1101/2021.09.21.461167v1 - Resource not found."


-542-543

"404 (client error): /works/10.4414/smw.2014 - Resource not found."


-544-545-546-547-548-549

"404 (client error): /works/10.1002/bbb.1548/abstract - Resource not found."


-550

"404 (client error): /works/10.1081/E-ESS-120001617 - Resource not found."


-551-552-553-554-555-556

"404 (client error): /works/10.4455/eu.2019.049 - Resource not found."


-557-558

"404 (client error): /works/10.1038/nn - Resource not found."


-559

"404 (client error): /works/doi:%2010.1016/j.radonc.2013.04.019. - Resource not found."


-560

"404 (client error): /works/10.1111/ele.12490/full - Resource not found."
"404 (client error): /works/10.4414/smw.2012.13694. - Resource not found."
"404 (client error): /works/10.16908/issn.1660-7104/291 - Resource not found."


-561

"404 (client error): /works/10.17458/per.vol15.2017.l.humansexdevelopment - Resource not found."


-562

"404 (client error): /works/Targeted%20delivery%20of%20interleukin-10%20to%20chronic%20cardiac%20allograft%20rejection%20using%20a%20human%20antibody%20s - Resource not found."


-563-564-565-566-567-568-569-570-571-572-573

"404 (client error): /works/10.3390/12010176 - Resource not found."


-574

"404 (client error): /works/10.25849/myrmecol.news_031:181 - Resource not found."


-575-576

"404 (client error): /works/10.1007/s11695 - Resource not found."


-577-578-579

"404 (client error): /works/doi:%2010.3324/haematol.2015. - Resource not found."
"404 (client error): /works/doi:%2010.3109/10428194.2015.1079315. - Resource not found."


-580

"404 (client error): /works/10.1159/00 - Resource not found."


-581-582-583-584-585-586-587

"404 (client error): /works/10.2312/bzpm_0700_2016 - Resource not found."


-588

"404 (client error): /works/25261198 - Resource not found."


-589-590-591

"404 (client error): /works/10.1016/j.anbehav.%202013.04 - Resource not found."


-592-593-594-595

"404 (client error): /works/10.1111/jth.12536. - Resource not found."


-596-597-598

"404 (client error): /works/10.1111/jeb.1245 - Resource not found."
"404 (client error): /works/10.5281/zenodo.192634 - Resource not found."


-599

"404 (client error): /works/10.1016/j.tins.2013 - Resource not found."
"404 (client error): /works/10.1016/j.mcn.2015.11.00710.1016/j.mcn.2015.11.007 - Resource not found."


-600-601-602-603

"404 (client error): /works/10.1111/ait.14192 - Resource not found."
"404 (client error): /works/10.111/tri.12904 - Resource not found."


-604

"404 (client error): /works/10.1016/j.ejso - Resource not found."


-605-606

"404 (client error): /works/116.308807 - Resource not found."


-607-608-609

"404 (client error): /works/10.1016/j - Resource not found."


-610-611-612-613-614

"404 (client error): /works/10.1074/mcp.m116.062273.epub2017jan4 - Resource not found."


-615-616

"404 (client error): /works/DOI:%2010.1128 - Resource not found."


-617-618-619-620

"404 (client error): /works/10.1101/gad.287094.116.%20Epub%202016%20Sep%2015. - Resource not found."


-621-622-623-624-625-626-627-628-629

"404 (client error): /works/10.1038 - Resource not found."


-630-631-632

"404 (client error): /works/10.ann/rscb-2015-0044:rscb - Resource not found."


-633[1] "Loop done"


In [18]:
# Work with the final bucket
file_name.2 <- "3_PublicationCitations_20230516.csv"
publication_citations <- read_delim(file_name.2, delim = ";", col_names=TRUE)

[1mRows: [22m[34m123591[39m [1mColumns: [22m[34m2[39m
[36m--[39m [1mColumn specification[22m [36m--------------------------------------------------------[39m
[1mDelimiter:[22m ";"
[31mchr[39m (1): doi
[32mdbl[39m (1): is.referenced.by.count

[36mi[39m Use `spec()` to retrieve the full column specification for this data.
[36mi[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [19]:
# Make unique
publication_citations <- unique(publication_citations)

In [20]:
head(publication_citations)

doi,is.referenced.by.count
<chr>,<dbl>
10.1016/j.sbi.2018.03.017,30
10.3791/58109,5
10.1371/journal.pcbi.1004848,48
10.1007/s00204-017-1994-x,22
10.1016/j.neuron.2014.11.024,56
10.1016/j.celrep.2017.10.004,30


In [21]:
head(publications)

GrantNumber,ScientificPublication_DOI,ScientificPublication_Year
<dbl>,<chr>,<dbl>
173211,10.1007/s40262-019-00821-w,2020
179247,10.1038/s41598-020-60709-9,2020
150758,10.1038/mi.2014.57,2015
150758,10.1016/j.immuni.2018.12.020,2019
150758,10.1038/mi.2017.2,2017
150758,10.4414/smw.2016.14350,2016


In [22]:
# Merge publications dataframe with the citation data, order by GrantNumber, and reorder columns
citation_data <- merge(publications, publication_citations, by.x="ScientificPublication_DOI", by.y="doi", all=TRUE)
citation_data <- arrange(citation_data, GrantNumber)
citation_data <- select(citation_data, GrantNumber, ScientificPublication_DOI, ScientificPublication_Year, is.referenced.by.count)

In [23]:
head(citation_data)

Unnamed: 0_level_0,GrantNumber,ScientificPublication_DOI,ScientificPublication_Year,is.referenced.by.count
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>,<dbl>
1,138726,10.1016/j.jsv.2014.05.024,2015,8
2,139011,10.1016/j.theriogenology.2014.11.030,2015,11
3,139011,10.1242/jeb.133843,2016,8
4,139011,10.3389/fevo.2016.00067,2016,6
5,139013,10.1016/j.cub.2014.01.056,2014,271
6,139013,10.1016/j.cub.2015.05.058,2015,62


In [24]:
# Group the citation_data by GrantNumber and calculate the total number of citations per grant as well as the unique number of cited publications
summarized_citations <- summarise(group_by(citation_data, GrantNumber), 
                                  NrCitablePublications=n_distinct(ScientificPublication_DOI),
                                  NrCitationsTotal=sum(is.referenced.by.count),
                                  YearFirstCitablePublication=min(ScientificPublication_Year)
                                 )

In [25]:
head(summarized_citations)

GrantNumber,NrCitablePublications,NrCitationsTotal,YearFirstCitablePublication
<dbl>,<int>,<dbl>,<dbl>
138726,1,8.0,2015.0
139011,3,25.0,2015.0
139013,14,1025.0,2013.0
139021,94,,2013.0
139093,23,1488.0,2015.0
139094,9,904.0,


In [26]:
# Load the prepared subset
biomed_grants <- read_delim("2_BiomedGrantsSubset.csv", delim = ";", col_names=TRUE)

[1mRows: [22m[34m7609[39m [1mColumns: [22m[34m44[39m
[36m--[39m [1mColumn specification[22m [36m--------------------------------------------------------[39m
[1mDelimiter:[22m ";"
[31mchr[39m   (7): State, FundingInstrumentLevel1, ResearchInstitution, InstituteCou...
[32mdbl[39m  (32): GrantNumber, CallDecisionYear, GrantDurationDays, GrantDurationYe...
[34mdate[39m  (5): EffectiveGrantStartDate, EffectiveGrantEndDate, DateFirstAcademic...

[36mi[39m Use `spec()` to retrieve the full column specification for this data.
[36mi[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [27]:
# Merge summarized citation data with the grant data of interest
biomed_grant_data <- left_join(biomed_grants, summarized_citations, by="GrantNumber")

In [28]:
biomed_grant_data <- arrange(biomed_grant_data, CallDecisionYear)

In [29]:
head(biomed_grant_data)

GrantNumber,CallDecisionYear,EffectiveGrantStartDate,GrantDurationDays,GrantDurationYears,EffectiveGrantEndDate,State,FundingInstrumentLevel1,AmountGrantedAllSets,AmountPerYear,AmountPerEmployee,AmountPerEmployeeAndYear,ResearchInstitution,InstituteCountry,MainDiscipline_Level1,MainDiscipline_Level2,MainDiscipline,NrEmployees,NrEmployeesPerYear,NrCollaborations,NrCollaborationsPerYear,NrAcademicEvents,NrAcademicEventsPerYear,DateFirstAcademicEvent,NrAwards,NrAwardsPerYear,YearFirstAward,NrDatasets,NrDatasetsPerYear,DateFirstDataset,NrKnowledgeTransfer,NrKnowledgeTransferPerYear,DateFirstKnowledgeTransfer,NrCommunication,NrCommunicationPerYear,YearFirstCommunication,NrTotalPublications,NrTotalPublicationsPerYear,YearFirstPublication,NrUses,NrUsesPerYear,YearFirstUse,NrTotalOutputs,NrTotalOutputsPerYear,NrCitablePublications,NrCitationsTotal,YearFirstCitablePublication
<dbl>,<dbl>,<date>,<dbl>,<dbl>,<date>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<date>,<dbl>,<dbl>,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<dbl>,<dbl>
142628,2012,2013-09-01,1490,4.0821918,2017-09-30,Completed,Careers,578812,141789.52,192937.33,47263.17,University of Lausanne - LA,Switzerland,Biology and Medicine,General Biology,Ecology,3,0.7348993,2,0.4899329,4,0.9798658,2015-03-10,1,0.2449664,2013.0,0,0,,0,0,,0,0.0,,8,1.9597315,2014.0,1,0.2449664,2015.0,14,3.42953,7.0,293.0,2014.0
142615,2012,2013-04-01,1095,3.0,2016-03-31,Completed,Careers,534863,178287.67,267431.5,89143.83,ETH Zurich - ETHZ,Switzerland,Biology and Medicine,Basic Biological Research,"Cellular Biology, Cytology",2,0.6666667,9,3.0,16,5.3333333,2013-05-16,0,0.0,,0,0,,0,0,,1,0.3333333,2013.0,8,2.6666667,,0,0.0,,25,8.333333,6.0,,
144100,2012,2013-09-01,1641,4.4958904,2018-02-28,Completed,Projects,291191,64768.26,72797.75,16192.07,University of Berne - BE,Switzerland,Biology and Medicine,Preventive Medicine (Epidemiology/Early Diagnosis/Prevention),"Mental Disorders, Psychosomatic Diseases",4,0.8897014,6,1.3345521,43,9.5642901,2014-03-01,0,0.0,,0,0,,0,0,,0,0.0,,4,0.8897014,,0,0.0,,47,10.453991,3.0,145.0,2015.0
147291,2012,2013-02-01,364,0.9972603,2014-01-31,Completed,Careers,60400,60565.93,0.0,0.0,Institution abroad - United States of America,United States of America,Biology and Medicine,Basic Biological Research,Biochemistry,0,0.0,1,1.0027473,1,1.0027473,2013-05-06,0,0.0,,0,0,,0,0,,0,0.0,,0,0.0,,0,0.0,,1,1.002747,,,
142408,2012,2013-01-01,545,1.4931507,2014-06-30,Completed,Careers,584267,391298.08,584267.0,391298.08,University of Zurich - ZH,Switzerland,Biology and Medicine,Basic Biological Research,Experimental Microbiology,1,0.6697248,4,2.6788991,4,2.6788991,2013-05-07,0,0.0,,0,0,,0,0,,0,0.0,,2,1.3394495,2013.0,0,0.0,,6,4.018349,1.0,,2013.0
143733,2012,2013-01-01,1276,3.4958904,2016-06-30,Completed,Projects,500000,143025.08,125000.0,35756.27,University of Geneva - GE,Switzerland,Biology and Medicine,Basic Biological Research,Biochemistry,4,1.1442006,6,1.7163009,18,5.1489028,2013-05-16,0,0.0,,0,0,,0,0,,6,1.7163009,2013.0,9,2.5744514,,0,0.0,,33,9.439655,8.0,216.0,


In [30]:
# Add number of uncitable publications (no DOI)
biomed_grant_data <- mutate(biomed_grant_data, NrUncitablePublications=NrTotalPublications-NrCitablePublications, .after=YearFirstPublication)

# Add number of uncitable publications (no DOI) per year
biomed_grant_data <- mutate(biomed_grant_data, NrUncitablePublicationsPerYear=NrUncitablePublications/GrantDurationYears, .after=NrUncitablePublications)

# Add number of citable publications per year
biomed_grant_data <- mutate(biomed_grant_data, NrCitablePublicationsPerYear=NrCitablePublications/GrantDurationYears, .after=NrCitablePublications)

# Add number of citations per citable publication
biomed_grant_data <- mutate(biomed_grant_data, NrCitationsPerPublication=NrCitationsTotal/NrCitablePublications, .after=NrCitationsTotal)

In [31]:
head(biomed_grant_data)

GrantNumber,CallDecisionYear,EffectiveGrantStartDate,GrantDurationDays,GrantDurationYears,EffectiveGrantEndDate,State,FundingInstrumentLevel1,AmountGrantedAllSets,AmountPerYear,AmountPerEmployee,AmountPerEmployeeAndYear,ResearchInstitution,InstituteCountry,MainDiscipline_Level1,MainDiscipline_Level2,MainDiscipline,NrEmployees,NrEmployeesPerYear,NrCollaborations,NrCollaborationsPerYear,NrAcademicEvents,NrAcademicEventsPerYear,DateFirstAcademicEvent,NrAwards,NrAwardsPerYear,YearFirstAward,NrDatasets,NrDatasetsPerYear,DateFirstDataset,NrKnowledgeTransfer,NrKnowledgeTransferPerYear,DateFirstKnowledgeTransfer,NrCommunication,NrCommunicationPerYear,YearFirstCommunication,NrTotalPublications,NrTotalPublicationsPerYear,YearFirstPublication,NrUncitablePublications,NrUncitablePublicationsPerYear,NrUses,NrUsesPerYear,YearFirstUse,NrTotalOutputs,NrTotalOutputsPerYear,NrCitablePublications,NrCitablePublicationsPerYear,NrCitationsTotal,NrCitationsPerPublication,YearFirstCitablePublication
<dbl>,<dbl>,<date>,<dbl>,<dbl>,<date>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<date>,<dbl>,<dbl>,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
142628,2012,2013-09-01,1490,4.0821918,2017-09-30,Completed,Careers,578812,141789.52,192937.33,47263.17,University of Lausanne - LA,Switzerland,Biology and Medicine,General Biology,Ecology,3,0.7348993,2,0.4899329,4,0.9798658,2015-03-10,1,0.2449664,2013.0,0,0,,0,0,,0,0.0,,8,1.9597315,2014.0,1.0,0.2449664,1,0.2449664,2015.0,14,3.42953,7.0,1.7147651,293.0,41.85714,2014.0
142615,2012,2013-04-01,1095,3.0,2016-03-31,Completed,Careers,534863,178287.67,267431.5,89143.83,ETH Zurich - ETHZ,Switzerland,Biology and Medicine,Basic Biological Research,"Cellular Biology, Cytology",2,0.6666667,9,3.0,16,5.3333333,2013-05-16,0,0.0,,0,0,,0,0,,1,0.3333333,2013.0,8,2.6666667,,2.0,0.6666667,0,0.0,,25,8.333333,6.0,2.0,,,
144100,2012,2013-09-01,1641,4.4958904,2018-02-28,Completed,Projects,291191,64768.26,72797.75,16192.07,University of Berne - BE,Switzerland,Biology and Medicine,Preventive Medicine (Epidemiology/Early Diagnosis/Prevention),"Mental Disorders, Psychosomatic Diseases",4,0.8897014,6,1.3345521,43,9.5642901,2014-03-01,0,0.0,,0,0,,0,0,,0,0.0,,4,0.8897014,,1.0,0.2224254,0,0.0,,47,10.453991,3.0,0.6672761,145.0,48.33333,2015.0
147291,2012,2013-02-01,364,0.9972603,2014-01-31,Completed,Careers,60400,60565.93,0.0,0.0,Institution abroad - United States of America,United States of America,Biology and Medicine,Basic Biological Research,Biochemistry,0,0.0,1,1.0027473,1,1.0027473,2013-05-06,0,0.0,,0,0,,0,0,,0,0.0,,0,0.0,,,,0,0.0,,1,1.002747,,,,,
142408,2012,2013-01-01,545,1.4931507,2014-06-30,Completed,Careers,584267,391298.08,584267.0,391298.08,University of Zurich - ZH,Switzerland,Biology and Medicine,Basic Biological Research,Experimental Microbiology,1,0.6697248,4,2.6788991,4,2.6788991,2013-05-07,0,0.0,,0,0,,0,0,,0,0.0,,2,1.3394495,2013.0,1.0,0.6697248,0,0.0,,6,4.018349,1.0,0.6697248,,,2013.0
143733,2012,2013-01-01,1276,3.4958904,2016-06-30,Completed,Projects,500000,143025.08,125000.0,35756.27,University of Geneva - GE,Switzerland,Biology and Medicine,Basic Biological Research,Biochemistry,4,1.1442006,6,1.7163009,18,5.1489028,2013-05-16,0,0.0,,0,0,,0,0,,6,1.7163009,2013.0,9,2.5744514,,1.0,0.2860502,0,0.0,,33,9.439655,8.0,2.2884013,216.0,27.0,


In [32]:
# NA --> 0 in certain columns
columns_to_mutate <- c(
  "NrCitablePublications",
  "NrCitablePublicationsPerYear",
  "NrUncitablePublications",
  "NrUncitablePublicationsPerYear"
)

biomed_grant_data <- mutate(biomed_grant_data, across(all_of(columns_to_mutate), ~ ifelse(is.na(.), 0, .)))

In [33]:
# Add the total number of uncitable outputs (outputs other than peer-reviewed articles with a DOI)
biomed_grant_data <- mutate(biomed_grant_data, NrTotalUncitableOutputs=NrTotalOutputs - NrCitablePublications, .after=NrTotalOutputsPerYear)

# Total number of uncitable outputs per year
biomed_grant_data <- mutate(biomed_grant_data, NrTotalUncitableOutputsPerYear=NrTotalUncitableOutputs/GrantDurationYears, .after=NrTotalUncitableOutputs)

In [34]:
dim(biomed_grant_data)
head(biomed_grant_data)

GrantNumber,CallDecisionYear,EffectiveGrantStartDate,GrantDurationDays,GrantDurationYears,EffectiveGrantEndDate,State,FundingInstrumentLevel1,AmountGrantedAllSets,AmountPerYear,AmountPerEmployee,AmountPerEmployeeAndYear,ResearchInstitution,InstituteCountry,MainDiscipline_Level1,MainDiscipline_Level2,MainDiscipline,NrEmployees,NrEmployeesPerYear,NrCollaborations,NrCollaborationsPerYear,NrAcademicEvents,NrAcademicEventsPerYear,DateFirstAcademicEvent,NrAwards,NrAwardsPerYear,YearFirstAward,NrDatasets,NrDatasetsPerYear,DateFirstDataset,NrKnowledgeTransfer,NrKnowledgeTransferPerYear,DateFirstKnowledgeTransfer,NrCommunication,NrCommunicationPerYear,YearFirstCommunication,NrTotalPublications,NrTotalPublicationsPerYear,YearFirstPublication,NrUncitablePublications,NrUncitablePublicationsPerYear,NrUses,NrUsesPerYear,YearFirstUse,NrTotalOutputs,NrTotalOutputsPerYear,NrTotalUncitableOutputs,NrTotalUncitableOutputsPerYear,NrCitablePublications,NrCitablePublicationsPerYear,NrCitationsTotal,NrCitationsPerPublication,YearFirstCitablePublication
<dbl>,<dbl>,<date>,<dbl>,<dbl>,<date>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<date>,<dbl>,<dbl>,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
142628,2012,2013-09-01,1490,4.0821918,2017-09-30,Completed,Careers,578812,141789.52,192937.33,47263.17,University of Lausanne - LA,Switzerland,Biology and Medicine,General Biology,Ecology,3,0.7348993,2,0.4899329,4,0.9798658,2015-03-10,1,0.2449664,2013.0,0,0,,0,0,,0,0.0,,8,1.9597315,2014.0,1,0.2449664,1,0.2449664,2015.0,14,3.42953,7,1.714765,7,1.7147651,293.0,41.85714,2014.0
142615,2012,2013-04-01,1095,3.0,2016-03-31,Completed,Careers,534863,178287.67,267431.5,89143.83,ETH Zurich - ETHZ,Switzerland,Biology and Medicine,Basic Biological Research,"Cellular Biology, Cytology",2,0.6666667,9,3.0,16,5.3333333,2013-05-16,0,0.0,,0,0,,0,0,,1,0.3333333,2013.0,8,2.6666667,,2,0.6666667,0,0.0,,25,8.333333,19,6.333333,6,2.0,,,
144100,2012,2013-09-01,1641,4.4958904,2018-02-28,Completed,Projects,291191,64768.26,72797.75,16192.07,University of Berne - BE,Switzerland,Biology and Medicine,Preventive Medicine (Epidemiology/Early Diagnosis/Prevention),"Mental Disorders, Psychosomatic Diseases",4,0.8897014,6,1.3345521,43,9.5642901,2014-03-01,0,0.0,,0,0,,0,0,,0,0.0,,4,0.8897014,,1,0.2224254,0,0.0,,47,10.453991,44,9.786715,3,0.6672761,145.0,48.33333,2015.0
147291,2012,2013-02-01,364,0.9972603,2014-01-31,Completed,Careers,60400,60565.93,0.0,0.0,Institution abroad - United States of America,United States of America,Biology and Medicine,Basic Biological Research,Biochemistry,0,0.0,1,1.0027473,1,1.0027473,2013-05-06,0,0.0,,0,0,,0,0,,0,0.0,,0,0.0,,0,0.0,0,0.0,,1,1.002747,1,1.002747,0,0.0,,,
142408,2012,2013-01-01,545,1.4931507,2014-06-30,Completed,Careers,584267,391298.08,584267.0,391298.08,University of Zurich - ZH,Switzerland,Biology and Medicine,Basic Biological Research,Experimental Microbiology,1,0.6697248,4,2.6788991,4,2.6788991,2013-05-07,0,0.0,,0,0,,0,0,,0,0.0,,2,1.3394495,2013.0,1,0.6697248,0,0.0,,6,4.018349,5,3.348624,1,0.6697248,,,2013.0
143733,2012,2013-01-01,1276,3.4958904,2016-06-30,Completed,Projects,500000,143025.08,125000.0,35756.27,University of Geneva - GE,Switzerland,Biology and Medicine,Basic Biological Research,Biochemistry,4,1.1442006,6,1.7163009,18,5.1489028,2013-05-16,0,0.0,,0,0,,0,0,,6,1.7163009,2013.0,9,2.5744514,,1,0.2860502,0,0.0,,33,9.439655,25,7.151254,8,2.2884013,216.0,27.0,


In [35]:
# Write  data to file
date_string <- format(Sys.Date(), "%Y%m%d")
file_name.3 <- sprintf("3_BiomedGrantData_%s.csv", date_string)

# Into tidy
write.table(biomed_grant_data, file_name.3, sep=";", fileEncoding="UTF-8", row.names = FALSE)