### Install and Import the necessary classes from the RDFlib library:

In [63]:
! pip install pandas
! pip install openpyxl
!pip install rdflib

import pandas as pd
import rdflib
import hashlib
import time
import numpy as np
import urllib.parse
from rdflib import Literal, Namespace, RDF, URIRef
from rdflib.namespace import FOAF, XSD
from rdflib import Graph, Namespace, RDF, RDFS, OWL
from rdflib.plugins.sparql import prepareQuery
from pyspark.sql.functions import when, col, lit



Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


### Step 01: Define the file paths and Load the data into Pandas dataframes and Clean

In [38]:
# Define the file paths
files = ['Data/Lab1/DECIDE_MTA_UGENT_14nov2022.xlsx', 
         'Data/Lab1/DECIDE_MTA_UGENT_BAC_AERO_14nov2022.xlsx', 
         'Data/Lab1/DECIDE_MTA_UGENTBAC_MYCO_14nov2022.xlsx']

# Load the data into Pandas dataframes
dfs = []
for file in files:
    df = pd.read_excel(file, engine='openpyxl')
    dfs.append(df)

barometer_dt_raw = dfs[0]
barometer_aero_cult_raw = dfs[1]
barometer_myco_cult_raw = dfs[2]


### Clean the Data 

In [39]:
import pandas as pd

# Data manipulation AEROBIC CULTURE results
barometer_aero_cult = barometer_aero_cult_raw \
    .rename(columns={"Dossiernummer": "Filenumber", 
                     "KIEMSTAAL IDENTIFICATIE": "Pathogen_identification", 
                     "KIEMSTAAL RESULTAAT": "Pathogen_result", 
                     "Staalnummer": "Samplenumber"}) \
    .assign(Parameter_code = "BAC_AERO", Result = "OK") \
    .filter(items=["Filenumber", "Pathogen_identification", "Pathogen_result", "Parameter_code", "Samplenumber", "Result"]) \
    .query('Pathogen_identification in ["Pasteurella multocida", "Mannheimia haemolytica", "Histophilus somni", "Mycoplasma bovis"]') \
    .drop_duplicates()

df_samples = pd.DataFrame([
    ("OK", "BAC_AERO", "Culture", "Pasteurella multocida"),
    ("OK", "BAC_AERO", "Culture", "Mannheimia haemolytica"),
    ("OK", "BAC_AERO", "Culture", "Histophilus somni"),
    ("OK", "BAC_MYCOPLASMA", "Culture", "Mycoplasma bovis")
], columns=["Result", "Parameter_code", "Diagnostic_test", "Pathogen_identification"])

In [40]:
# Data manipulation MYCOPLASMA CULTURE results
barometer_myco_cult = barometer_myco_cult_raw \
    .rename(columns={"Dossiernummer": "Filenumber", "KIEMSTAAL IDENTIFICATIE": "Pathogen_identification", 
                     "KIEMSTAAL RESULTAAT": "Mycoplasma_result", "Staalnummer": "Samplenumber"}) \
    .assign(Parameter_code="BAC_MYCOPLASMA", Result="OK") \
    .loc[barometer_myco_cult_raw["KIEMSTAAL IDENTIFICATIE"] == "Mycoplasma bovis"] \
    .drop_duplicates(subset=["Filenumber", "Pathogen_identification", "Mycoplasma_result", "Parameter_code", "Samplenumber", "Result"]) \
    [["Filenumber", "Pathogen_identification", "Mycoplasma_result", "Parameter_code", "Samplenumber", "Result"]]


#print(barometer_myco_cult)




In [41]:
# Data manipulation PCR results
barometer_dtt = barometer_dt_raw \
    .rename(columns={"Dossiernummer": "Filenumber", "Staalnummer": "Samplenumber", 
                     "Staaltype": "Sample_type", "PARAMETER_CODE": "Parameter_code",
                     "Onderzoek": "Pathogen", "Resultaat": "Result",
                     "Creatiedatum": "Date", "Postcode": "Postal_code",
                     "ANON_ID": "Farm_ID"}) \
    .assign(Country=np.where(barometer_dt_raw["PARAMETER_CODE"].isin(["BAC_AERO", "BAC_MYCOPLASMA"]), "Belgium", np.nan)) \
    .assign(Diagnostic_test=np.where(barometer_dt_raw["PARAMETER_CODE"].isin(["BAC_AERO", "BAC_MYCOPLASMA"]), "Culture", "PCR")) \
    .assign(Lab_reference="1") \
    .replace({"RU Broncho-alveolar lavage (BAL)": "BAL",
              "RU Anderen": "Unknown",
              "RU Swabs": "Swab",
              "RU Swab": "Swab",
              "RU Neusswab": "Swab",
              "RU Neusswabs": "Swab",
              "RU Kadaver": "Autopsy",
              "RU Organen": "Autopsy",
              np.nan: "Missing"}) \
    .assign(Breed=np.where(barometer_dt_raw["Bedrijfstype"] == "VCALF", "Veal",
                          np.where(barometer_dt_raw["MEAT"].isnull(), "Unknown",
                          np.where((barometer_dt_raw["MEAT"] / barometer_dt_raw["TOTAL"]) > 0.9, "Beef",
                          np.where((barometer_dt_raw["MILK"] / barometer_dt_raw["TOTAL"]) > 0.9, "Dairy",
                          "Mixed"))))) \
    [["Filenumber", "Pathogen", "Result", "Parameter_code", "Samplenumber", "Result", "Country", "Diagnostic_test", "Lab_reference", "Sample_type", "Postal_code", "Farm_ID", "Breed"]]

In [42]:
# Data manipulation PCR results
barometer_dtt = barometer_dt_raw.rename(columns={"Dossiernummer": "Filenumber",
                                                  "Staalnummer": "Samplenumber",
                                                  "Staaltype": "Sample_type",
                                                  "PARAMETER_CODE": "Parameter_code",
                                                  "Onderzoek": "Pathogen",
                                                  "Resultaat": "Result",
                                                  "Creatiedatum": "Date",
                                                  "Postcode": "Postal_code",
                                                  "ANON_ID": "Farm_ID"})
    
barometer_dtt["Country"] = np.where(barometer_dtt["Parameter_code"].isin(["BAC_AERO", "BAC_MYCOPLASMA"]), "Belgium", np.nan)
barometer_dtt["Diagnostic_test"] = np.where(barometer_dtt["Parameter_code"].isin(["BAC_AERO", "BAC_MYCOPLASMA"]), "Culture", "PCR")
barometer_dtt["Lab_reference"] = "1"

sample_type_mapping = {"RU Broncho-alveolar lavage (BAL)": "BAL",
                       "RU Anderen": "Unknown",
                       "RU Swabs": "Swab",
                       "RU Swab": "Swab",
                       "RU Neusswab": "Swab",
                       "RU Neusswabs": "Swab",
                       "RU Kadaver": "Autopsy",
                       "RU Organen": "Autopsy"}

barometer_dtt["Sample_type"] = barometer_dtt["Sample_type"].map(sample_type_mapping).fillna("Missing")

breed_mapping = {"VCALF": "Veal",
                 "MEAT": np.nan}
barometer_dtt["Breed"] = np.select([(barometer_dtt["Bedrijfstype"] == "VCALF"),
                                     (barometer_dtt["MEAT"].isnull()),
                                     ((barometer_dtt["MEAT"] / barometer_dtt["TOTAL"]) > 0.9),
                                     ((barometer_dtt["MILK"] / barometer_dtt["TOTAL"]) > 0.9)],
                                    ["Veal", "Unknown", "Beef", "Dairy"],
                                    default="Mixed")

pathogen_mapping = {"AD Pasteurella multocida Ag (PCR)": "Pasteurella multocida",
                    "AD Pasteurella multocida Ag pool (PCR)": "Pasteurella multocida",
                    "AD P. multocida Ag (PCR)": "Pasteurella multocida",
                    "AD P. multocida Ag pool (PCR)": "Pasteurella multocida",
                    "AD Mannheimia haemolytica Ag (PCR)": "Mannheimia haemolytica",
                    "AD Mannheimia haemolytica Ag pool (PCR)": "Mannheimia haemolytica",
                    "RU PI3 Ag (PCR)": "PI3",
                    "RU PI3 Ag pool (PCR)": "PI3",
                    "RU BRSV Ag (PCR)": "BRSV",
                    "RU BRSV Ag pool (PCR)": "BRSV",
                    "AD Histophilus somnus (PCR)": "Histophilus somni",
                    "AD Histophilus somnus Ag (PCR)": "Histophilus somni",
                    "AD Histophilus somnus Ag pool (PCR)": "Histophilus somni",
                    "AD Histophilus somni Ag (PCR)": "Histophilus somni",
                    "AD Histophilus somni Ag pool (PCR)": "Histophilus somni",
                    "RU Mycoplasma bovis (PCR)": "Mycoplasma bovis",
                    "RU Mycoplasma bovis Ag pool (PCR)": "Mycoplasma bovis",
                    "RU Mycoplasma bovis Ag (PCR)": "Mycoplasma bovis",
                    "AD Corona Ag (PCR)": "BCV",
                    "AD Corona Ag pool (PCR)": "BCV"}

# Create a new column 'Disease' based on the mapping between Pathogen and Disease
barometer_dtt["Disease"] = barometer_dtt["Pathogen"].replace(pathogen_mapping)

# Create a mapping between postal codes and provinces
province_map = [(1000, 1299, "Brussels"),
                (1300, 1499, "Walloon Brabant"),
                (1500, 1999, "Flemish Brabant"),
                (3000, 3499, "Antwerp"),
                (2000, 2999, "Limburg"),
                (5000, 5999, "Namur"),
                (6000, 6599, "Hainaut"),
                (7000, 7999, "Hainaut"),
                (6600, 6999, "Luxembourg"),
                (8000, 8999, "West Flanders")]

# Sort the province_map list by the first element of each tuple
province_map.sort(key=lambda x: x[0])

# Create a new column 'Province' based on the mapping between Postal_code and Province
barometer_dtt["Province"] = pd.cut(barometer_dtt["Postal_code"], 
                               bins=[p[0]-1 for p in province_map] + [max([p[1] for p in province_map])+1],
                               labels=[p[2] for p in province_map],
                               ordered=False)


# Select columns of interest and drop duplicates
barometer_dtt = barometer_dtt.loc[:, ["Filenumber", "Diagnostic_test", "Samplenumber", "Country", 
                                      "Lab_reference", "Sample_type", "Breed", "Parameter_code", 
                                      "Result", "Pathogen", "Date", "Postal_code", "Province", "Farm_ID"]]\
                             .drop_duplicates()

# Show the resulting dataframe
#print(barometer_dtt.head())


In [43]:
 #Join dataframes
barometer = pd.merge(barometer_dtt, df_samples, on=['Diagnostic_test', 'Result', 'Parameter_code'], how='left')
barometer = pd.merge(barometer, barometer_aero_cult, on=['Filenumber', 'Samplenumber', 'Result', 'Parameter_code', 'Pathogen_identification'], how='left')
barometer = pd.merge(barometer, barometer_myco_cult, on=['Filenumber', 'Samplenumber', 'Result', 'Parameter_code', 'Pathogen_identification'], how='left')

# Replace values in Pathogen column
barometer['Pathogen'] = np.where(barometer['Pathogen'] == 'Pasteurella multocida', 'PM', 
                                 np.where(barometer['Pathogen'] == 'Histophilus somni', 'HS',
                                          np.where(barometer['Pathogen'] == 'Mannheimia haemolytica', 'MH', 
                                                   np.where(barometer['Pathogen'] == 'Mycoplasma bovis', 'MB', barometer['Pathogen']))))

barometer['Pathogen'] = np.where(barometer['Pathogen_identification'] == 'Pasteurella multocida', 'PM', 
                                 np.where(barometer['Pathogen_identification'] == 'Histophilus somni', 'HS',
                                          np.where(barometer['Pathogen_identification'] == 'Mannheimia haemolytica', 'MH', 
                                                   np.where(barometer['Pathogen_identification'] == 'Mycoplasma bovis', 'MB', barometer['Pathogen']))))

# Replace values in Result column
conditions = [barometer['Result'].isin(["Twijfelachtig (PCR)", "POSITIEF", "GEDETECTEERD", "GEDETECTEERD (sterk)", "GEDETECTEERD (zwak)", "GEDETECTEERD (matig)", "GEDETECTEERD (zeer sterk)", "GEDETECTEERD (zeer zwak)"]),
              barometer['Result'].isin(["negatief", "Niet gedetecteerd"]),
              barometer['Result'].isin(["NI", "niet interpreteerbaar", "Inhibitie"]),
              (barometer['Parameter_code'] == 'BAC_AERO') & (barometer['Pathogen_result'].isnull()),
              (barometer['Parameter_code'] == 'BAC_AERO') & (barometer['Pathogen_result'].notnull()),
              (barometer['Parameter_code'] == 'BAC_MYCOPLASMA') & (barometer['Mycoplasma_result'].isnull()),
              (barometer['Parameter_code'] == 'BAC_MYCOPLASMA') & (barometer['Mycoplasma_result'] == 'neg'),
              (barometer['Parameter_code'] == 'BAC_MYCOPLASMA') & (barometer['Mycoplasma_result'].str.contains('POS'))]

choices = [1, 0, None, 0, 1, None, 0, 1]

barometer['Result'] = np.select(conditions, choices, default=None)
#print(barometer.head())

Unnamed: 0,LabReference,Country,Breed,Date,Province,DiagnosticTest,SampleType,Pathogen,Result
0,1,Belgium,Beef,2016-12-01,Antwerp,PCR,Autopsy,BCV,0.0
1,1,Belgium,Beef,2016-12-01,Antwerp,PCR,Autopsy,BRSV,
2,1,Belgium,Beef,2016-12-01,Antwerp,PCR,Autopsy,HS,1.0
3,1,Belgium,Beef,2016-12-01,Antwerp,PCR,Autopsy,MB,1.0
4,1,Belgium,Beef,2016-12-01,Antwerp,PCR,Autopsy,MH,1.0
...,...,...,...,...,...,...,...,...,...
24443,1,Belgium,Veal,2021-11-01,WestFlanders,PCR,Swab,HS,0.0
24444,1,Belgium,Veal,2021-11-01,WestFlanders,PCR,Swab,MB,1.0
24445,1,Belgium,Veal,2021-11-01,WestFlanders,PCR,Swab,MH,0.0
24446,1,Belgium,Veal,2021-11-01,WestFlanders,PCR,Swab,PI3,1.0


### Step 02: Create an RDF graph and namespaces.

In [91]:
g = rdflib.Graph()
xsd = Namespace('http://www.w3.org/2001/XMLSchema#')
g.bind('xsd', xsd)

# Define your custom namespace for your ontology's properties
LHO = Namespace("http://www.purl.org/decide/LiveStockHealthOnto/LHO#")
g.bind('LHO', LHO)

SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
g.bind('skos', SKOS)

decide=  Namespace("http://www.purl.org/decide#")
g.bind('decide', decide)
ncit = Namespace("http://purl.obolibrary.org/obo/NCIT_C25464")
g.bind('ncit', ncit)
agrovoc = Namespace ("http://aims.fao.org/aos/agrovoc")
g.bind('agrovoc', agrovoc)


###  Step 03: Iterate over the Panda DataFrame and map to ontology properties:

In [92]:
#for index, row in barometer.iterrows():
for index, row in barometer.head(500).iterrows():
    PathogenCode = row["Pathogen"]
    Breed = row["Breed"]
    Country = row["Country"]
    Province = row["Province"]
    SampleType = row["SampleType"]
    Result = row["Result"]
    LabReference = row["LabReference"]
    DiagnosticTest = row["DiagnosticTest"]
    Date = row["Date"]
    

    
    # Create a unique URI for each sample based on the row index
    CattleSample_uri = URIRef(f"http://www.purl.org/decide/LiveStockHealthOnto/LHO#Lab1CattleSample_{index}")
    
    # Add sample type assertion
    g.add((CattleSample_uri, RDF.type, LHO.CattleSample))
    description = "An individual representing a sample from a cattle."
    g.add((CattleSample_uri, RDFS.comment, Literal(description, lang="en")))
    
    if "Pathogen" in row and row["Pathogen"]:
        Pathogen = row["Pathogen"]
        Pathogen_uri = URIRef(f"http://www.purl.org/decide/LiveStockHealthOnto/LHO#{Pathogen}")
        g.add((CattleSample_uri, LHO.hasPathogen, Pathogen_uri))
        g.add((Pathogen_uri, RDF.type, decide.Pathogen))
        description = "An individual representing a cattle breed of mixed meat and dairy type."
        g.add((Pathogen_uri, RDFS.comment, Literal(description, lang="en")))
        
            
    if "Breed" in row and row["Breed"]:
        Breed = row["Breed"]
        Breed_uri = URIRef(f"http://www.purl.org/decide/LiveStockHealthOnto/LHO#{Breed}")
        g.add((CattleSample_uri, LHO.hasBreed, Breed_uri))
        g.add((Breed_uri, RDF.type, LHO.Breed))
        description = "An individual representing a cattle breed of mixed meat and dairy type."
        g.add((Breed_uri, RDFS.comment, Literal(description, lang="en")))
        
        
    if "DiagnosticTest" in row and row["DiagnosticTest"]:
        DiagnosticTest= row["DiagnosticTest"]
        DiagnosticTest_uri = URIRef(f"http://www.purl.org/decide/LiveStockHealthOnto/LHO#{DiagnosticTest}")
        g.add((CattleSample_uri, LHO.hasDiagnosticTest, DiagnosticTest_uri))
        g.add((DiagnosticTest_uri, RDF.type, LHO.DiagnosticTest))

    if "Country" in row and row["Country"]:
        Country = row["Country"].strip()  # Use strip() to remove leading and trailing spaces
        # Encode the Country value to create a valid URI
        encoded_country = urllib.parse.quote(Country)
        Country_uri = URIRef(f"http://www.purl.org/decide/LiveStockHealthOnto/LHO#{encoded_country}")
    
        # Map to the specific class URI in your ontology and add the label
        g.add((CattleSample_uri, LHO.hasCountry, Country_uri))
        g.add((Country_uri, RDF.type, URIRef("http://purl.obolibrary.org/obo/NCIT_C25464")))  # Use the specific class URI
        g.add((Country_uri, RDFS.label, Literal("The Netherland")))
        description = "An individual representing different Countries."
        g.add((Country_uri, RDFS.comment, Literal(description, lang="en")))

    
        
    if "Province" in row and row["Province"]:
        Province = row["Province"].strip()
        encoded_province = urllib.parse.quote(Province)
        Province_uri = URIRef(f"http://www.purl.org/decide/LiveStockHealthOnto/LHO#{encoded_province}")
        g.add((CattleSample_uri, LHO.hasProvince, Province_uri))
        g.add((Province_uri, RDF.type, LHO.Province))
        description = "An individual representing different Province."
        g.add((Province_uri, RDFS.comment, Literal(description, lang="en")))
    
    
    if "SampleType" in row and row["SampleType"]:
        SampleType = row["SampleType"]
        SampleType_uri = URIRef(f"http://www.purl.org/decide/LiveStockHealthOnto/LHO#{SampleType}")
        # Add statements for SampleType
        g.add((CattleSample_uri, LHO.hasSampleType, SampleType_uri))
        g.add((SampleType_uri, RDF.type, URIRef("http://www.purl.org/decide#SampleType")))
           
    if "Result" in row and row["Result"]:
        SampleResult = row["Result"]
    # Create a unique URI for the Result based on the value
        Result_uri = URIRef(f"http://www.purl.org/decide/LiveStockHealthOnto/LHO#{SampleResult}")
        g.add((CattleSample_uri, LHO.hasResult, Result_uri))
        g.add((Result_uri, RDF.type, decide.SampleResult))
        
    if "LabReference" in row and row["LabReference"]:
        LabReference = row["LabReference"]
    # Create a unique URI for the LabReference based on the value
        LabReference_uri = URIRef(f"http://www.purl.org/decide/LiveStockHealthOnto/LHO#{LabReference}")
        g.add((CattleSample_uri, LHO.hasLabReference, LabReference_uri))
        g.add((LabReference_uri, RDF.type, LHO.LabReference))

    if "Floored_date" in row and not pd.isnull(row["Floored_date"]):  # Check for NaN or None values
        Date = str(row["Floored_date"]).strip()
    
    # Split the datetime string and take the date part
        Date = Date.split()[0]
    
    # Create a unique URI for the Date based on the value (use the Date directly)
        Date_uri = URIRef(f"http://www.purl.org/decide/LiveStockHealthOnto/LHO#{Date}")
    
    # Add Date assertion
        g.add((CattleSample_uri, LHO.hasDate, Date_uri))
        g.add((Date_uri, RDF.type, LHO.Date))
    
    # Add any additional properties related to Date as needed


# Serialize the RDF graph to a file
rdf_output_file = "output/RDFoutputCattleSampleLab1.ttl"
g.serialize(rdf_output_file, format="xml")

#Serialize the RDF graph to Turtle format and print it
turtle_data = g.serialize(format="turtle")
print(turtle_data)


@prefix LHO: <http://www.purl.org/decide/LiveStockHealthOnto/LHO#> .
@prefix decide: <http://www.purl.org/decide#> .
@prefix ncit: <http://purl.obolibrary.org/obo/NCIT_C25464> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

LHO:Lab1CattleSample_0 a LHO:CattleSample ;
    LHO:hasBreed LHO:Beef ;
    LHO:hasCountry LHO:Belgium ;
    LHO:hasDiagnosticTest LHO:PCR ;
    LHO:hasLabReference LHO:1 ;
    LHO:hasPathogen LHO:BCV ;
    LHO:hasProvince LHO:Antwerp ;
    LHO:hasSampleType LHO:Autopsy ;
    rdfs:comment "An individual representing a sample from a cattle."@en .

LHO:Lab1CattleSample_1 a LHO:CattleSample ;
    LHO:hasBreed LHO:Beef ;
    LHO:hasCountry LHO:Belgium ;
    LHO:hasDiagnosticTest LHO:PCR ;
    LHO:hasLabReference LHO:1 ;
    LHO:hasPathogen LHO:BRSV ;
    LHO:hasProvince LHO:Antwerp ;
    LHO:hasResult LHO:nan ;
    LHO:hasSampleType LHO:Autopsy ;
    rdfs:comment "An individual representing a sample from a cattle."@en .

LHO:Lab1CattleSample_10 a LHO:CattleSa

### Step 4: Load the RDF data and ontology into a Panda DataFrame: 

In [97]:
# Parse the ontology file in OWL format and add it to the graph
path_to_ontology = "Ontology/LivestockHealthOnto1.0.owl"
g.parse(path_to_ontology, format="xml")
    


<Graph identifier=N7b3f24e727c245138fcd00ea171f027c (<class 'rdflib.graph.Graph'>)>

In [95]:
#Use RDFS or OWL reasoning to infer additional knowledge
g.bind('rdfs', RDFS)
g.bind('owl', OWL)
g.bind('onto', Namespace("http://www.purl.org/decide/LivestockHealthOnto"))

### Step 05: Query the data from updated ontology 

In [98]:
# define the SPARQL query 01 1 for Filtering Positive Cattle Samples for MB Pathogen via Swab Tests

query = """
PREFIX decide: <http://www.purl.org/decide#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX LHO: <http://www.purl.org/decide/LiveStockHealthOnto/LHO#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT
  (strafter(str(?Sample), "#") AS ?SampleName)
  (strafter(str(?Pathogen), "#") AS ?PathogenName)
  (strafter(str(?Breed), "#") AS ?BreedName)
  (strafter(str(?LivestockProductionStages), "#") AS ?StagesName)
  (strafter(str(?SampleType), "#") AS ?SampleTypeName)
  (strafter(str(?DiagnosticTest), "#") AS ?DiagnosticTestName)
  (strafter(str(?Country), "#") AS ?CountryName)
  (strafter(str(?Province), "#") AS ?ProvinceName)
  (strafter(str(?SampleResult), "#") AS ?SampleResultName)

WHERE {
  
  {
    ?Sample rdf:type LHO:CattleSample .
    ?Sample LHO:hasPathogen ?Pathogen .
    FILTER (?Pathogen = LHO:MB)
    ?Sample LHO:hasSampleType ?SampleType .
    FILTER (?SampleType = LHO:Swab)
    ?Sample LHO:hasCountry ?Country .
    ?Sample LHO:hasProvince ?Province .
    ?Sample LHO:hasBreed ?Breed .
    ?Sample LHO:hasResult ?SampleResult .
    FILTER (?SampleResult = LHO:1.0)
    ?Sample LHO:hasDiagnosticTest ?DiagnosticTest .
  }
}
"""
# Execute the query and retrieve the results
results = g.query(query)

# Convert the results to a Pandas dataframe
data = []
for row in results:
    data.append(list(row))
df = pd.DataFrame(data, columns=["Sample", "Pathogen", "Breed","ProductionStages", "SampleType",  "DiagnosticTest", "Country","Province", "SampleResult"])

# Display the dataframe
df

Unnamed: 0,Sample,Pathogen,Breed,ProductionStages,SampleType,DiagnosticTest,Country,Province,SampleResult
0,Lab1CattleSample_59,MB,Beef,,Swab,PCR,Belgium,EastFlanders,1.0
1,Lab1CattleSample_346,MB,Beef,,Swab,PCR,Belgium,Limburg,1.0
2,Lab1CattleSample_367,MB,Beef,,Swab,PCR,Belgium,Limburg,1.0
3,Lab1CattleSample_465,MB,Beef,,Swab,PCR,Belgium,Limburg,1.0


In [99]:
# define the SPARQL query 02


query = """
PREFIX decide: <http://www.purl.org/decide#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX LHO: <http://www.purl.org/decide/LiveStockHealthOnto/LHO#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT
  (strafter(str(?Sample), "#") AS ?SampleName)
  (strafter(str(?Pathogen), "#") AS ?PathogenName)
  (strafter(str(?Breed), "#") AS ?BreedName)
  (strafter(str(?LivestockProductionStages), "#") AS ?StagesName)
  (strafter(str(?SampleType), "#") AS ?SampleTypeName)
  (strafter(str(?DiagnosticTest), "#") AS ?DiagnosticTestName)
  (strafter(str(?Country), "#") AS ?CountryName)
  (strafter(str(?SampleResult), "#") AS ?SampleResultName)

WHERE {
  
  {
    ?Sample rdf:type LHO:CattleSample .
    ?Sample LHO:hasPathogen ?Pathogen .
    FILTER (?Pathogen = LHO:BCV)
    ?Sample LHO:hasSampleType ?SampleType .
    ?Sample LHO:hasCountry ?Country .
    ?Sample LHO:hasBreed ?Breed .
    ?Sample LHO:hasResult ?SampleResult .
    ?Sample LHO:hasDiagnosticTest ?DiagnosticTest .
    FILTER (?DiagnosticTest = LHO:PCR)
  }
}
"""
# Execute the query and retrieve the results
results = g.query(query)

# Convert the results to a Pandas dataframe
data = []
for row in results:
    data.append(list(row))
df = pd.DataFrame(data, columns=["Sample", "Pathogen", "Breed","ProductionStages", "SampleType",  "DiagnosticTest", "Country", "SampleResult"])

# Display the dataframe
df

Unnamed: 0,Sample,Pathogen,Breed,ProductionStages,SampleType,DiagnosticTest,Country,SampleResult
0,Lab1CattleSample_21,BCV,Beef,,BAL,PCR,Belgium,1.0
1,Lab1CattleSample_28,BCV,Beef,,BAL,PCR,Belgium,1.0
2,Lab1CattleSample_35,BCV,Beef,,BAL,PCR,Belgium,1.0
3,Lab1CattleSample_49,BCV,Beef,,BAL,PCR,Belgium,1.0
4,Lab1CattleSample_56,BCV,Beef,,Swab,PCR,Belgium,1.0
5,Lab1CattleSample_63,BCV,Beef,,BAL,PCR,Belgium,1.0
6,Lab1CattleSample_70,BCV,Beef,,Autopsy,PCR,Belgium,1.0
7,Lab1CattleSample_77,BCV,Beef,,BAL,PCR,Belgium,1.0
8,Lab1CattleSample_84,BCV,Beef,,BAL,PCR,Belgium,1.0
9,Lab1CattleSample_147,BCV,Beef,,Swab,PCR,Belgium,1.0
