In [7]:
import pandas as pd
from rdflib import Graph, Namespace, Literal, RDF, RDFS, XSD, URIRef
from urllib.parse import quote

In [8]:
# Load NEVO data
df = pd.read_csv("final_dataset_demo.csv", delimiter=",")

In [9]:
# RDF Namespaces
FN = Namespace("http://example.org/food-nutrition#")
XSD = Namespace("http://www.w3.org/2001/XMLSchema#")

g = Graph()
g.bind("fn", FN)
g.bind("xsd", XSD)

# Define Classes (already in ontology)
for cls in ["Food", "Food_Wheel_Group", "Food_Wheel_Subgroup", "Diet", "Benefit"]:
    g.add((FN[cls], RDF.type, RDFS.Class))

# Nutrients mapping: column -> (value_prop, unit_prop, unit)
nutrients = {
    "Calories (kcal)": ("hasCalories", "hasCaloriesUnit", "kcal"),
    "PROT (g)": ("hasProtein", "hasProteinUnit", "g"),
    "FAT (g)": ("hasFat", "hasFatUnit", "g"),
    "VITB12 (µg)": ("hasVitaminB12", "hasVitaminB12Unit", "µg"),
    "VITC (mg)": ("hasVitaminC", "hasVitaminCUnit", "mg"),
    "VITD (µg)": ("hasVitaminD", "hasVitaminDUnit", "µg"),
    "Iron (mg)": ("hasIron", "hasIronUnit", "mg")
}

# Loop through DataFrame and add triples
for idx, row in df.iterrows():
    safe_food_name = quote(row["food_name"], safe="")
    food_uri = URIRef(FN + safe_food_name)

    g.add((food_uri, RDF.type, FN.Food))
    g.add((food_uri, FN.foodName, Literal(row["food_name"], datatype=XSD.string)))
    g.add((food_uri, FN.NEVOCode, Literal(str(row["NEVO-code"]), datatype=XSD.string)))

    # Add nutrients (value + unit)
    for col, (val_prop, unit_prop, unit) in nutrients.items():
        val = row.get(col)
        if pd.notnull(val):
            g.add((food_uri, FN[val_prop], Literal(float(val), datatype=XSD.float)))
            g.add((food_uri, FN[unit_prop], Literal(unit, datatype=XSD.string)))

    # Food wheel group/subgroup
    if pd.notnull(row.get("food_wheel_group")):
        group_uri = URIRef(FN + quote(row["food_wheel_group"], safe=""))
        g.add((food_uri, FN.belongsToFoodWheelGroup, group_uri))
    if pd.notnull(row.get("food_wheel_subgroup")):
        subgroup_uri = URIRef(FN + quote(row["food_wheel_subgroup"], safe=""))
        g.add((food_uri, FN.belongsToFoodWheelSubGroup, subgroup_uri))

    # Diet
    if pd.notnull(row.get("diet_label")):
        diet_uri = URIRef(FN + quote(row["diet_label"], safe=""))
        g.add((food_uri, FN.belongsToDiet, diet_uri))

    tags = row.get("tag")
    if pd.notnull(tags):
        # if stored as string, convert using ast.literal_eval
        if isinstance(tags, str):
            import ast
            try:
                tags = ast.literal_eval(tags)
            except:
                tags = [tags]  # fallback: treat as single tag
        # now iterate safely
        for t in tags:
            g.add((food_uri, FN.hasTag, Literal(t, datatype=XSD.string)))

# Serialize RDF/XML
g.serialize("food_nutrition.rdf", format="xml")
print("RDF file generated: food_nutrition.rdf")

# Serialize Turtle
g.serialize("food_nutrition.ttl", format="turtle")
print("RDF Turtle file generated: food_nutrition.ttl")

RDF file generated: food_nutrition.rdf
RDF Turtle file generated: food_nutrition.ttl
