In [1]:
using<-function(...) {
    libs<-unlist(list(...))
    req<-unlist(lapply(libs,require,character.only=TRUE))
    need<-libs[req==FALSE]
    if(length(need)>0){ 
        install.packages(need)
        lapply(need,require,character.only=TRUE)
    }
}

In [2]:
using("data.table", "tidyverse", "naniar", "stringr", "readr",  "dplyr", "magrittr", "readxl", "writexl", "sjmisc", "tidyr", "rdflib"
                  )


Loading required package: data.table

Loading required package: tidyverse

-- [1mAttaching core tidyverse packages[22m ------------------------ tidyverse 2.0.0 --
[32mv[39m [34mdplyr    [39m 1.1.0     [32mv[39m [34mreadr    [39m 2.1.4
[32mv[39m [34mforcats  [39m 1.0.0     [32mv[39m [34mstringr  [39m 1.5.0
[32mv[39m [34mggplot2  [39m 3.4.1     [32mv[39m [34mtibble   [39m 3.2.0
[32mv[39m [34mlubridate[39m 1.9.2     [32mv[39m [34mtidyr    [39m 1.3.0
[32mv[39m [34mpurrr    [39m 1.0.1     
-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mbetween()[39m     masks [34mdata.table[39m::between()
[31mx[39m [34mdplyr[39m::[32mfilter()[39m      masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mfirst()[39m       masks [34mdata.table[39m::first()
[31mx[39m [34mlubridate[39m::[32mhour()[39m    masks [34mdata.table[39m::hour()
[31mx[39m [34mlubridate[3

In [4]:
##Function to apply SHA-256 hashing

sha256_hash <- function(data) {
  openssl::sha256(data)
}


In [7]:
## Data loading


barometer_dt_raw <- readxl::read_excel("Data/GD/221122_data_RGD_DECIDE_nw (2).xlsx")


In [8]:
## Data manipulation


barometer_dt <- barometer_dt_raw %>% 
  dplyr::rename(
    Filenumber = Dossier_ID,
    Samplenumber = sample_id,
    Farm_ID = farm_ID,
    Project = project,
    Date = date
    ) %>%
  dplyr::mutate(
    Country ='The Netherlands',
    Lab_reference ='2', 
    Sample_type = case_when(
      reason_of_sampling == 'Autopsy' ~ 'Autopsy',
      sample == 'BAL' ~ 'BAL',
      sample == 'SWABS' ~ 'Swab', 
      sample == 'OTHER' ~ 'Unknown',
      TRUE ~ 'Missing' 
    ),
    Diagnostic_test = case_when(
      test == 'PCR' ~ 'PCR',
      test == 'Kweek' ~ "Culture",
      TRUE ~ 'Missing' 
    ),
    Breed = case_when(
      breed == "beef" ~ 'Beef', 
      breed == "dairy" ~'Dairy',
      breed == "mixed" ~ 'Mixed',
      breed == "veal" ~ 'Veal',
      breed %in% c("other", "rearing", "unknown") ~ 'Unknown',
      TRUE ~ 'Unknown'
    ),
    Province = case_when(
      provincie == "DR" ~ 'Drenthe', 
      provincie == "FL" ~'Flevoland',
      provincie == "FR" ~'Friesland',
      provincie == "GL" ~'Gelderland',
      provincie == "GR" ~'Groningen',
      provincie == "LB" ~'Limburg',
      provincie == "NB" ~'North Brabant',
      provincie == "NH" ~'North Holland',
      provincie == "OV" ~'Overijssel',
      provincie == "UT" ~'Utrecht',
      provincie == "ZH" ~'South Holland',
      provincie == "ZL" ~'Zeeland',
      TRUE ~ 'Missing'
    )
  
  )%>%
  dplyr::select(
    Filenumber,
    Diagnostic_test,
    Samplenumber,
    Country,
    Lab_reference,
    Sample_type,
    Breed,
    PM,
    MH,
    HS,
    MB,
    BRSV,
    PI3,
    BCV,
    Date,
    Province,
    Project,
    Farm_ID
  ) %>%
  dplyr::distinct() %>%
dplyr::mutate(
    Filenumber = sha256_hash(as.character(Filenumber)),
    Samplenumber = sha256_hash(as.character(Samplenumber)),
    Farm_ID = sha256_hash(as.character(Farm_ID))
    )

In [9]:
## Filter data for 'monitoring' and 'no projects'


barometer_dt_filtered <- filter(barometer_dt, Project == 'monitoring' | Project == 'no project')


## Floor date to 1st of the month


barometer_dt_filtered$Floored_date <- lubridate::floor_date(barometer_dt_filtered$Date, "month")

## Aggregate data based on farm_ID and month (WIDE)


barometer_groupby <- barometer_dt_filtered %>%
  group_by(Lab_reference, Country, Breed, Floored_date, Province, Farm_ID, Diagnostic_test, Sample_type) %>%
  summarise(across(c(PM, MH, HS, MB, BRSV, PI3, BCV), max))

[1m[22m`summarise()` has grouped output by 'Lab_reference', 'Country', 'Breed',
'Floored_date', 'Province', 'Farm_ID', 'Diagnostic_test'. You can override
using the `.groups` argument.


In [10]:
## Convert to LONG



barometer_long <- barometer_groupby %>%
  tidyr::pivot_longer(
    cols = c('PM', 'MH', 'HS', 'MB', 'BRSV', 'PI3', 'BCV'),
    names_to = 'Pathogen',
    values_to = 'Result',
  )







In [29]:
library(magrittr)
library(rdflib)
library(XML)

In [None]:
# create an RDF object
Literal <- function(value, datatype="xsd:string") {
  rdf_literal(value, datatype=datatype)
}
rdf <- rdf()
 
# create a namespace prefix for the ontology
onto <- "http://www.purl.org/decide/LivestockHealthOnto"
 
# iterate through each row in the barometer DataFrame
for (i in 1:nrow(barometer_long)) {
  row <- barometer_long[i,] 
# create a URI for the CattleSample individual based on the row ID
CattleSample <- paste0(prefix, "CattleSample", i)

# add triples to the RDF graph


  #rdf_add(rdf, CattleSample, paste0(prefix, "hasBreed"), as.character(row[3]), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasLabReference"), ifelse(is.na(row$Lab_reference), "", as.character(row$Lab_reference)))
  rdf_add(rdf, CattleSample, paste0(onto, "hasCountry"), ifelse(is.na(row$Country), "", as.character(row$Country)))
  rdf_add(rdf, CattleSample, paste0(onto, "hasBreed"), ifelse(is.na(row$Breed), "", as.character(row$Breed)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasDate"), ifelse(is.na(row$Floored_date), "", as.character(row$Floored_date)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasProvince"), ifelse(is.na(row$Province), "", as.character(row$Province)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasFarmIdentification"), ifelse(is.na(row$Farm_ID), "", as.character(row$Farm_ID)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasDiagnosticTest"), ifelse(is.na(row$Diagnostic_test), "", as.character(row$Diagnostic_test)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasSampleType"), ifelse(is.na(row$Sample_type), "", as.character(row$Sample_type)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasPathogen"), ifelse(is.na(row$Pathogen), "", as.character(row$Pathogen)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasResult"), ifelse(is.na(row$Result), "Missing", as.character(row$Result)), datatype="xsd:string")

}
 
# print the RDF graph (for testing)
cat(rdf_serialize(rdf, format="turtle"))
 
# output the RDF graph to a file
## Save file to RDF (Long Version) instead of CSV
rdf_serialize(rdf, "output/RDFoutputCattleSampleGD.ttl", format="turtle")