In [None]:
import pandas as pd
from rdflib import Graph, Namespace, URIRef, Literal, XSD
from rdflib.namespace import RDF, OWL, FOAF,RDFS
import numpy as np

In [None]:
g = Graph()

# create namespce for process resource and ontology
CLBR = Namespace("https://climatebowl.data.dice-research.org/resource/")
g.bind("clbr",CLBR)

CLBO = Namespace("https://climatebowl.data.dice-research.org/ontology/")
g.bind("clbo", CLBO)

ontology_file = "ontology.ttl"
g.parse(ontology_file, format="turtle")

In [None]:

def process_excel_to_rdf(pcf_sheet, Process_worksheet_name, company, product):
    # Define the namespace
    CLBO = Namespace("https://climatebowl.data.dice-research.org/ontology/")
    # Read the Excel sheet
    df = pd.read_excel(pcf_sheet, header=3, sheet_name=Process_worksheet_name)
    
    # Fill NaN values with an empty string
    df = df.fillna("")
    print(df)
    
    
    # Initialize the graph
    g = Graph()
    
    # Create URIs for the company and product
    companyURI = URIRef(f"https://climatebowl.data.dice-research.org/resource/company_{company}")
    g.add((companyURI, RDF.type, CLBO.Company))
    g.add((companyURI, RDFS.label, Literal(company)))

    productURI = URIRef(f"https://climatebowl.data.dice-research.org/resource/product_{company}_{product}")
    g.add((productURI, RDF.type, CLBO.Product))
    g.add((productURI, RDFS.label, Literal(product)))
    g.add((companyURI, CLBO.hasProduct, productURI))

    # Process each row in the dataframe
    prozess_no = 1
    fluzz_input = 1
    emission_no = 1
    material_no = 1

    for index, row in df.iterrows():
        # Create URIs for the process and fluzz resource
        prozess_uri = URIRef(f"https://climatebowl.data.dice-research.org/resource/Process_{company}_{product}_{prozess_no}")
        fluzz_resouce_uri = URIRef(f"https://climatebowl.data.dice-research.org/resource/{company}_{product}_InputProcess_{prozess_no - 1}Fluzz{fluzz_input}")
        hat_io_ontology_uri = URIRef(f"https://climatebowl.data.dice-research.org/ontology/hatInput")
        
        g.add((prozess_uri, RDF.type, CLBO.Process))
        g.add((productURI, CLBO.hasProcess, prozess_uri))
        g.add((prozess_uri, CLBO.processmodule, Literal(row["ProcessModule"])))
        g.add((prozess_uri, CLBO.allocationPartNumber, Literal(row["AllocationPartNumber"])))
        g.add((prozess_uri, hat_io_ontology_uri, fluzz_resouce_uri))
        
        # Process the fluzz resource
        g.add((fluzz_resouce_uri, RDF.type, CLBO.Flow))
        relevantPCF = row["relevant for PCF"] == "x"
        g.add((fluzz_resouce_uri, CLBO.relevantPCF, Literal(relevantPCF, datatype=XSD.boolean)))
        
        if row["Allocation Value Chain"]:
            g.add((fluzz_resouce_uri, CLBO.allocationValueChain, Literal(row["Allocation Value Chain"])))
        if row["Scope"]:
            g.add((fluzz_resouce_uri, CLBO.scope, Literal(row["Scope"])))
        if row["Life Cycle Phase"]:
            g.add((fluzz_resouce_uri, CLBO.lifecyclePhase, Literal(row["Life Cycle Phase"])))
        
        # Process kategorie fluss
        kategoryFluss = str(row['CategoryFlow']).replace(" ","")
        if kategoryFluss:
            kategorie_fluss_uri = URIRef(f"https://climatebowl.data.dice-research.org/resource/{company}_{product}_{kategoryFluss}{material_no}")
            g.add((fluzz_resouce_uri, CLBO.hasCategory, kategorie_fluss_uri))
            g.add((kategorie_fluss_uri, RDF.type, getattr(CLBO, kategoryFluss)))
            g.add((kategorie_fluss_uri, RDFS.label, Literal(row["Flow"])))
            material_no += 1
        
        # Add resultierende Menge
        if row["amount"]:
            g.add((fluzz_resouce_uri, URIRef(f"https://climatebowl.data.dice-research.org/ontology/resulingQuantityPerReferenceFlow"), Literal(float(row["amount"]), datatype=XSD.float)))
        if row["Unit"]:
            g.add((fluzz_resouce_uri, URIRef(f"https://climatebowl.data.dice-research.org/ontology/resulingQuantityPerReferenceFlowUnit"), Literal(row["Unit"])))
        
        # Add emission factor
        emissionFactorURI = URIRef(f"https://climatebowl.data.dice-research.org/resource/THGEmissionen{emission_no}_{company}_{product}")
        g.add((fluzz_resouce_uri, CLBO.hasEmissionsfactor, emissionFactorURI))
        g.add((emissionFactorURI, RDF.type, CLBO.THGEmissions))
        g.add((emissionFactorURI, CLBO.item, Literal(row["Item Emissionsfactor"])))
        g.add((emissionFactorURI, CLBO.emissionfactor, Literal(float(row["Emissionsfactor (GWP100 pro Einheit nach IPCC AR6)"]), datatype=XSD.float)))
        g.add((emissionFactorURI, CLBO.unit, Literal(row["Unit Emissionsfactor"])))
        g.add((emissionFactorURI, CLBO.land, Literal(row["Region"])))
        g.add((emissionFactorURI, CLBO.year, Literal(row["year"])))
        g.add((emissionFactorURI, CLBO.datasource, Literal(row["Datesource"])))
        emission_no += 1
        
        # Add material
        materialURI = URIRef(f"https://climatebowl.data.dice-research.org/resource/material{material_no}_{company}_{product}")
        g.add((materialURI, RDF.type, getattr(CLBO, str(row["Group Flow"]).replace(", ", "_"))))
        g.add((materialURI, CLBO.itemName, Literal(row["Flow"])))
        g.add((materialURI, CLBO.emissionsValue, Literal(float(row["Emissionsfactor (GWP100 pro Einheit nach IPCC AR6)"]), datatype=XSD.float)))
        g.add((materialURI, CLBO.emissionsValue, Literal(row["Unit Emissionsfactor"])))
        g.add((materialURI, CLBO.region, Literal(row["Region"])))
        g.add((materialURI, CLBO.year, Literal(row["year"])))
        material_no += 1
        
        # Increment process and fluzz input counters
        prozess_no += 1
        fluzz_input += 1
    
    return g

# Example usage
# g = process_excel_to_rdf('path_to_file.xlsx', 'Sheet1', 'company1', 'product1')


In [None]:

def process_ef_to_rdf(fileName, sheetName, dataName):
  g = Graph()
  print(fileName,sheetName,dataName)
  # Emission Factor
  emissiondf = pd.read_excel(fileName, sheet_name=sheetName)
  emissiondf
  material_no = 1
  for index, row in emissiondf.iterrows():
    materialURI = URIRef(f"https://climatebowl.data.dice-research.org/resource/material{material_no}_{dataName}")
    if not pd.isna(row["Group Flow"]) : g.add((materialURI,RDF.type,getattr(CLBO,str(row["Group Flow"]).replace(", ","_"))))
    g.add((materialURI,CLBO.itemName,Literal(row["Standardflow"])))
    g.add((materialURI,CLBO.emissionsValue,Literal(float(row["EmissionsFactor"]),datatype=XSD.float)))
    g.add((materialURI,CLBO.emissionsUnit,Literal(row["Unit"])))
    g.add((materialURI,CLBO.region,Literal(row["Region"])))
    g.add((materialURI,CLBO.year,Literal(row["year"])))
    material_no = material_no +1
  
  return g
  
  

In [None]:
import json
# Read the JSON data from the file
with open("data.json", 'r') as file:
    data = json.load(file)

# Loop through each item in the JSON data
for item in data:
    product_name = item['productName']
    company_name = item['companyName']
    file_name = item['fileName']
    sheet_name = item['sheetName']
    
    # Call the function to process the Excel sheet and add to the RDF graph
    g += process_excel_to_rdf(f"excel_data/{file_name}", sheet_name, company_name, product_name)
    
with open("emissions_data.json","r") as file:
    emissions_data = json.load(file)

# Loop through each item in the JSON data
for item in emissions_data:
    data_name = item['dataName']
    file_name = item['fileName']
    sheet_name = item['sheetName']
    
    # Call the function to process the Excel sheet and add to the RDF graph
    g += process_ef_to_rdf(f"excel_data/{file_name}", sheet_name, data_name)


In [None]:

# Export the graph to a Turtle file
with open('generatedRDF/knowledgeGraph.ttl', 'w', encoding='utf8') as f:
    f.write(g.serialize(format='turtle'))