In [10]:
import pandas as pd
from rdflib import Graph, Namespace, Literal, RDF, RDFS, XSD, URIRef
from urllib.parse import quote

In [11]:
# Load NEVO data
df = pd.read_csv("final_dataset_demo.csv", delimiter=",")

In [12]:
# RDF Namespaces
FN = Namespace("http://example.org/food-nutrition#")
XSD = Namespace("http://www.w3.org/2001/XMLSchema#")

g = Graph()
g.bind("fn", FN)
g.bind("xsd", XSD)

# Define Classes (you already have these in your ontology)
g.add((FN.Food, RDF.type, RDFS.Class))
g.add((FN.Food_Wheel_Group, RDF.type, RDFS.Class))
g.add((FN.Food_Wheel_Subgroup, RDF.type, RDFS.Class))
g.add((FN.Diet, RDF.type, RDFS.Class))
g.add((FN.Benefit, RDF.type, RDFS.Class))

# Loop through DataFrame and add triples
for idx, row in df.iterrows():
    # URL-encode food name for URI
    safe_food_name = quote(row["food_name"], safe="")  # encodes everything unsafe
    food_uri = URIRef(FN + safe_food_name)

    g.add((food_uri, RDF.type, FN.Food))
    g.add((food_uri, FN.foodName, Literal(row["food_name"], datatype=XSD.string)))
    g.add((food_uri, FN.NEVOCode, Literal(str(row["NEVO-code"]), datatype=XSD.string)))

    # Nutritional data
    if not pd.isnull(row.get("Calories (kcal)")):
        g.add((food_uri, FN.hasCalories, Literal(row["Calories (kcal)"], datatype=XSD.float)))
    if not pd.isnull(row.get("PROT (g)")):
        g.add((food_uri, FN.hasProtein, Literal(row["PROT (g)"], datatype=XSD.float)))
    if not pd.isnull(row.get("FAT (g)")):
        g.add((food_uri, FN.hasFat, Literal(row["FAT (g)"], datatype=XSD.float)))
    if not pd.isnull(row.get("VITB12 (µg)")):
        g.add((food_uri, FN.hasVitaminB12, Literal(row["VITB12 (µg)"], datatype=XSD.float)))
    if not pd.isnull(row.get("VITC (mg)")):
        g.add((food_uri, FN.hasVitaminC, Literal(row["VITC (mg)"], datatype=XSD.float)))
    if not pd.isnull(row.get("VITD (µg)")):
        g.add((food_uri, FN.hasVitaminD, Literal(row["VITD (µg)"], datatype=XSD.float)))
    if not pd.isnull(row.get("Iron (mg)")):
        g.add((food_uri, FN.hasIron, Literal(row["Iron (mg)"], datatype=XSD.float)))

    # Food wheel group and subgroup
    if not pd.isnull(row.get("food_wheel_group")):
        group_uri = URIRef(FN + quote(row["food_wheel_group"], safe=""))
        g.add((food_uri, FN.belongsToFoodWheelGroup, group_uri))
    if not pd.isnull(row.get("food_wheel_subgroup")):
        subgroup_uri = URIRef(FN + quote(row["food_wheel_subgroup"], safe=""))
        g.add((food_uri, FN.belongsToFoodWheelSubGroup, subgroup_uri))

    # Diet label
    if not pd.isnull(row.get("diet_label")):
        diet_uri = URIRef(FN + quote(row["diet_label"], safe=""))
        g.add((food_uri, FN.belongsToDiet, diet_uri))

    # Benefits
    if not pd.isnull(row.get("benefits")):
        benefit_uri = URIRef(FN + quote(row["benefits"], safe=""))
        g.add((food_uri, FN.hasBenefit, benefit_uri))

# Save as RDF
g.serialize("food_nutrition.rdf", format="xml")
print("RDF file generated: food_nutrition.rdf")

# Serialize RDF to Turtle
g.serialize("food_nutrition.ttl", format="turtle")
print("RDF Turtle file generated: food_nutrition.ttl")

RDF file generated: food_nutrition.rdf
RDF Turtle file generated: food_nutrition.ttl
