In [24]:
! pip install pandas
! pip install openpyxl

import pandas as pd
import rdflib
import hashlib
import numpy as np
from datetime import datetime
from pandas.api.types import CategoricalDtype
from rdflib import Literal, Namespace, RDF, URIRef
from rdflib.namespace import FOAF, XSD
from rdflib import Graph, Namespace, RDF, RDFS, OWL
from rdflib.plugins.sparql import prepareQuery
from pyspark.sql.functions import when, col, lit



### Step01: Load RDF data from files


In [43]:
# Load RDF data
rdf_files = [
    "output/RDFoutputCattleSampleDGZ.ttl",
    "output/RDFoutputCattleSampleGD.ttl",
    "output/RDFoutputCattleSampleIreland.ttl",
    "output/RDFoutputCattleSamplePathosen.ttl",
    "output/RDFoutputCattleSampleArsia.ttl"
]

g = Graph()
for rdf_file in rdf_files:
    g.parse(rdf_file, format="turtle")


In [21]:

# Initialize lists to store RDF subjects
rdf_subjects = []

# Iterate through RDF triples and extract subjects
for subject, _, _ in g:
    rdf_subjects.append(subject)

# Convert RDF subjects to DataFrames
data_frames = []
for subject in rdf_subjects:
    rdf_data = g.triples((subject, None, None))
    data = {}
    for _, predicate, obj in rdf_data:
        column_name = predicate.split("#")[-1]
        data[column_name] = [obj.toPython() if isinstance(obj, Literal) else obj]
    df = pd.DataFrame(data)
    data_frames.append(df)

# Concatenate DataFrames
barometer_Tableau = pd.concat(data_frames, ignore_index=True)

# Convert to datetime and add Month and Year columns
barometer_Tableau['http://www.purl.org/decide/LivestockHealthOntohasDate'] = pd.to_datetime(barometer_Tableau['http://www.purl.org/decide/LivestockHealthOntohasDate'])
barometer_Tableau['Month'] = barometer_Tableau['http://www.purl.org/decide/LivestockHealthOntohasDate'].dt.month
barometer_Tableau['Year'] = barometer_Tableau['http://www.purl.org/decide/LivestockHealthOntohasDate'].dt.year


In [22]:
barometer_Tableau

Unnamed: 0,type,http://www.purl.org/decide/LivestockHealthOntohasBreed,http://www.purl.org/decide/LivestockHealthOntohasCountry,http://www.purl.org/decide/LivestockHealthOntohasDate,http://www.purl.org/decide/LivestockHealthOntohasDiagnosticTest,http://www.purl.org/decide/LivestockHealthOntohasFarmIdentification,http://www.purl.org/decide/LivestockHealthOntohasLabReference,http://www.purl.org/decide/LivestockHealthOntohasPathogen,http://www.purl.org/decide/LivestockHealthOntohasProvince,http://www.purl.org/decide/LivestockHealthOntohasResult,...,http://www.purl.org/decide/LivestockHealthOntohasParameterCode,http://www.purl.org/decide/LivestockHealthOntohasPathogenIdentification,http://www.purl.org/decide/LivestockHealthOntohasPathogenResult,http://www.purl.org/decide/LivestockHealthOntohasPostalCode,http://www.purl.org/decide/LivestockHealthOntohasSampleNumber,http://www.purl.org/decide/LivestockHealthOntohasLabreference,domain,range,Month,Year
0,http://www.purl.org/decide/LivestockHealthOnto...,Dairy,The Netherlands,2020-08-01 00:00:00,PCR,2956e9c51dc5b47a558a54b83846f9fd11443baaa5d33e...,2,P_M,North Brabant,Missing,...,,,,,,,,,8.0,2020.0
1,http://www.purl.org/decide/LivestockHealthOnto...,Beef,,2018-05-10 10:19:13,PCR,,1,RU PI3 Ag (PCR),West Flanders,1,...,BO_VIR_RSB_PCR,,,8630,9e8c38c290da3fdc7a8d52ef8484f159168b9dcee2e33e...,,,,5.0,2018.0
2,http://www.purl.org/decide/LivestockHealthOnto...,Dairy,,2019-10-10 10:55:37,PCR,,1,RU PI3 Ag (PCR),Antwerp,1,...,BO_VIR_RSB_PCR,,,3300,ee26e9a40528121d86534fbc57cfe014e827dd16c013c1...,,,,10.0,2019.0
3,http://www.purl.org/decide/LivestockHealthOnto...,Dairy,,2018-10-06 09:14:28,PCR,,1,RU PI3 Ag (PCR),Limburg,1,...,BO_VIR_RSB_PCR,,,2330,b6357b7a9fc90636a6f4cb397b606d9ff57548df0833cb...,,,,10.0,2018.0
4,http://www.purl.org/decide/LivestockHealthOnto...,Beef,The Netherlands,2022-11-01 00:00:00,Culture,92484b903eda64821e5f465743bac7f74693862af5fa0e...,2,P_M,Gelderland,1.0,...,,,,,,,,,11.0,2022.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254430,http://www.purl.org/decide/LivestockHealthOnto...,Beef,The Netherlands,2019-07-01 00:00:00,Culture,776829230338a04c49e3d4ebe6c6c2dddb7ebcf2bd86de...,2,B_C,Gelderland,Missing,...,,,,,,,,,7.0,2019.0
254431,http://www.purl.org/decide/LivestockHealthOnto...,Dairy,,2019-10-03 08:55:10,PCR,,1,RU PI3 Ag (PCR),Antwerp,1,...,BO_VIR_RSB_PCR,,,3300,b625d1fc29f2a257af3588119669ae1dc075882ef5e93d...,,,,10.0,2019.0
254432,http://www.purl.org/decide/LivestockHealthOnto...,Dairy,The Netherlands,2021-11-01 00:00:00,PCR,d2b95e8a635cbe08def8f74d99a1f164e49f489b08f26f...,2,B_R,Friesland,0.0,...,,,,,,,,,11.0,2021.0
254433,http://www.purl.org/decide/LivestockHealthOnto...,Dairy,The Netherlands,2020-01-01 00:00:00,PCR,561895b5eb08d5dea2e144be4c799643f557dcea321b6b...,2,P_M,Gelderland,Missing,...,,,,,,,,,1.0,2020.0


In [41]:
# Define filename and folder path for RDF
folder_path_rdf = "output/"
base_filename   = "barometer_combined"
current_date    = datetime.now().strftime("%Y-%m-%d")
new_filename_rdf = f"{folder_path_rdf}{base_filename}_{current_date}.rdf"

# Save RDF graph to file
g.serialize(new_filename_rdf, format="turtle")

<Graph identifier=N44d0b201dde446ccbfed2d738bec3e7f (<class 'rdflib.graph.Graph'>)>