In [1]:
using<-function(...) {
    libs<-unlist(list(...))
    req<-unlist(lapply(libs,require,character.only=TRUE))
    need<-libs[req==FALSE]
    if(length(need)>0){ 
        install.packages(need)
        lapply(need,require,character.only=TRUE)
    }
}

In [2]:
using("data.table", "tidyverse", "naniar", "stringr", "readr",  "dplyr", "magrittr", "readxl", "writexl", "sjmisc", "tidyr", "rdflib"
                  )

Loading required package: data.table

Loading required package: tidyverse

-- [1mAttaching core tidyverse packages[22m ------------------------ tidyverse 2.0.0 --
[32mv[39m [34mdplyr    [39m 1.1.0     [32mv[39m [34mreadr    [39m 2.1.4
[32mv[39m [34mforcats  [39m 1.0.0     [32mv[39m [34mstringr  [39m 1.5.0
[32mv[39m [34mggplot2  [39m 3.4.1     [32mv[39m [34mtibble   [39m 3.2.0
[32mv[39m [34mlubridate[39m 1.9.2     [32mv[39m [34mtidyr    [39m 1.3.0
[32mv[39m [34mpurrr    [39m 1.0.1     
-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mbetween()[39m     masks [34mdata.table[39m::between()
[31mx[39m [34mdplyr[39m::[32mfilter()[39m      masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mfirst()[39m       masks [34mdata.table[39m::first()
[31mx[39m [34mlubridate[39m::[32mhour()[39m    masks [34mdata.table[39m::hour()
[31mx[39m [34mlubridate[3

In [4]:
##Function to apply SHA-256 hashing

sha256_hash <- function(data) {
  openssl::sha256(data)
}

In [6]:
## Data loading



barometer_dt_raw <- read.csv("Data/ARSIA/ARSIA_DECIDE_20221201.xlsx")


In [7]:
## Adding of rows for pathogens

barometer_dt <- barometer_dt_raw %>%
  dplyr::mutate(
    HS = ifelse(str_detect(pathogens, "Histophilus somni"), 1, 0),
    MH = ifelse(str_detect(pathogens, "Mannheimia haemolytica"), 1, 0),
    PM = ifelse(str_detect(pathogens, "Pasteurella multocida"), 1, 0),
    BCV = ifelse(str_detect(pathogens, "Bovine coronavirus"), 1, 0),
    MB = ifelse(str_detect(pathogens, "Mycoplasmopsis bovis"), 1, 0),
    PI3 = ifelse(str_detect(pathogens, "Bovine respirovirus 3"), 1, 0),
    BRSV = ifelse(str_detect(pathogens, "Bovine orthopneumovirus"), 1, 0),
  )


In [8]:
## Data manipulation
barometer_dt <- barometer_dt_raw %>% 
  dplyr::rename(
    Filenumber = sample_id,
    #Samplenumber = sample_id,
    Farm_ID = farm_id,
    #Project = project,
    Date = created
    ) %>%
  dplyr::mutate(
    Lab_reference ='4',
    Diagnostic_test = 'NPS',
    Breed = 'Unknown',
    Province = NA, 
    Country = case_when(
      country == 'BE' ~ 'Belgium',
      country == 'NL' ~ 'The Netherlands'
    ),
    Sample_type = case_when(
      type == 'balFluid' ~ 'BAL',
      type == 'noseSwab' ~ 'Swab', 
      TRUE ~ 'Other' 
    ),
    HS = ifelse(str_detect(pathogens, "Histophilus somni"), 1, 0),
    MH = ifelse(str_detect(pathogens, "Mannheimia haemolytica"), 1, 0),
    PM = ifelse(str_detect(pathogens, "Pasteurella multocida"), 1, 0),
    BCV = ifelse(str_detect(pathogens, "Bovine coronavirus"), 1, 0),
    MB = ifelse(str_detect(pathogens, "Mycoplasmopsis bovis"), 1, 0),
    PI3 = ifelse(str_detect(pathogens, "Bovine respirovirus 3"), 1, 0),
    BRSV = ifelse(str_detect(pathogens, "Bovine orthopneumovirus"), 1, 0)
    
  )%>%
  dplyr::select(
    Filenumber,
    Lab_reference,
    Country,
    Breed,
    Province,
    Farm_ID,
    Diagnostic_test,
    Sample_type,
    PM,
    MH,
    HS,
    MB,
    BRSV,
    PI3,
    BCV,
    Date
  ) %>%
  dplyr::distinct()%>%
dplyr::mutate(
    Filenumber = sha256_hash(as.character(Filenumber)),
    Farm_ID = sha256_hash(as.character(Farm_ID))
    )


In [9]:
## Floor date to 1st of month



barometer_dt$Date <- lubridate::ymd_hms(barometer_dt$Date)
barometer_dt$Floored_date <- lubridate::floor_date(barometer_dt$Date, "month")

## Aggregate data based on farm_ID & month


barometer_groupby <- barometer_dt %>%
  group_by(Lab_reference, Country, Breed, Floored_date, Province, Farm_ID, Diagnostic_test, Sample_type) %>%
  summarise(across(c(PM, MH, HS, MB, BRSV, PI3, BCV), max))

# If all are NA, than NA, if not (else): max in group, while ignoring NA 


## Convert to long

barometer_long <- barometer_groupby %>%
  tidyr::pivot_longer(
    cols = c('PM', 'MH', 'HS', 'MB', 'BRSV', 'PI3', 'BCV'),
    names_to = 'Pathogen',
    values_to = 'Result',
  )



[1m[22m`summarise()` has grouped output by 'Lab_reference', 'Country', 'Breed',
'Floored_date', 'Province', 'Farm_ID', 'Diagnostic_test'. You can override
using the `.groups` argument.


In [10]:
library(magrittr)
library(rdflib)
library(XML)

In [14]:
# create an RDF object
Literal <- function(value, datatype="xsd:string") {
  rdf_literal(value, datatype=datatype)
}
rdf <- rdf()
 
# create a namespace prefix for the ontology
onto <- "http://www.purl.org/decide/LivestockHealthOnto"
 
# iterate through each row in the barometer DataFrame
for (i in 1:nrow(barometer_long)) {
  row <- barometer_long[i,] 
# create a URI for the CattleSample individual based on the row ID
CattleSample <- paste0(onto, "CattleSample", i)

# add triples to the RDF graph


  #rdf_add(rdf, CattleSample, paste0(prefix, "hasBreed"), as.character(row[3]), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasLabReference"), ifelse(is.na(row$Lab_reference), "", as.character(row$Lab_reference)))
  rdf_add(rdf, CattleSample, paste0(onto, "hasCountry"), ifelse(is.na(row$Country), "", as.character(row$Country)))
  rdf_add(rdf, CattleSample, paste0(onto, "hasBreed"), ifelse(is.na(row$Breed), "", as.character(row$Breed)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasDate"), ifelse(is.na(row$Floored_date), "", as.character(row$Floored_date)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasProvince"), ifelse(is.na(row$Province), "", as.character(row$Province)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasFarmIdentification"), ifelse(is.na(row$Farm_ID), "", as.character(row$Farm_ID)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasDiagnosticTest"), ifelse(is.na(row$Diagnostic_test), "", as.character(row$Diagnostic_test)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasSampleType"), ifelse(is.na(row$Sample_type), "", as.character(row$Sample_type)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasPathogen"), ifelse(is.na(row$Pathogen), "", as.character(row$Pathogen)), datatype="xsd:string")
  rdf_add(rdf, CattleSample, paste0(onto, "hasResult"), ifelse(is.na(row$Result), "Missing", as.character(row$Result)), datatype="xsd:string")

}
 
# print the RDF graph (for testing)
cat(rdf_serialize(rdf, format="turtle"))
 
# output the RDF graph to a file
## Save file to RDF (Long Version) instead of CSV
rdf_serialize(rdf, "output/RDFoutputCattleSamplePathosen.ttl", format="turtle")

@base <localhost://> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

<http://www.purl.org/decide/LivestockHealthOntoCattleSample1>
    <http://www.purl.org/decide/LivestockHealthOntohasBreed> "Unknown"^^<xsd:string> ;
    <http://www.purl.org/decide/LivestockHealthOntohasCountry> "Belgium" ;
    <http://www.purl.org/decide/LivestockHealthOntohasDate> "2020-09-01"^^<xsd:string> ;
    <http://www.purl.org/decide/LivestockHealthOntohasDiagnosticTest> "NPS"^^<xsd:string> ;
    <http://www.purl.org/decide/LivestockHealthOntohasFarmIdentification> "1cdabc0d91a051251a6e7b1bd972f6cb63daafcee9a8256d9e1a1fa8f95ee086"^^<xsd:string> ;
    <http://www.purl.org/decide/LivestockHealthOntohasLabReference> "4" ;
    <http://www.purl.org/decide/LivestockHealthOntohasPathogen> "PM"^^<xsd:string> ;
    <http://www.purl.org/decide/LivestockHealthOntohasProvince> _:r1691677094r29936r1 ;
    <http://www.purl.org/decide/LivestockHealthOntohasResult> "0"^^<xsd:string> ;
    <http://www.purl.org