In [6]:
!pip install rdflib



In [7]:

from rdflib import Graph, Literal, RDF, RDFS, Namespace, URIRef

SCHEMA = Namespace("http://schema.org/")
KG = Namespace("http://kg-course.io/food-nutrition/")

g = Graph()
g.bind("schema", SCHEMA)
g.bind("ex", KG)

 
classes = [
    (KG.Recipe, SCHEMA.Recipe, "A food recipe"),
    (KG.Restaurant, SCHEMA.FoodEstablishment, "An establishment that serves food"),
    (KG.Nutrition, SCHEMA.NutritionInformation, "Nutritional facts about a recipe"),
    (KG.Review, SCHEMA.Review, "A user review")
]

for uri, s_type, comment in classes:
    g.add((uri, RDF.type, RDFS.Class))
    g.add((uri, RDFS.subClassOf, s_type))
    g.add((uri, RDFS.comment, Literal(comment)))

 

properties = [
    # object properties
    (KG.hasNutrition, RDF.Property, KG.Recipe, KG.Nutrition,
     "Links a recipe to its nutrition information"),

    (KG.itemReviewed, RDF.Property, KG.Review, KG.Recipe,
     "Links a review to the recipe being reviewed"),

    # datatype properties
    (KG.recipeIngredient, RDF.Property, KG.Recipe, RDFS.Literal,
     "Ingredient used in a recipe"),

    (KG.recipeCategory, RDF.Property, KG.Recipe, RDFS.Literal,
     "Category of the recipe"),

    (KG.cookTime, RDF.Property, KG.Recipe, RDFS.Literal,
     "Cooking time of the recipe"),

    (KG.calories, RDF.Property, KG.Nutrition, RDFS.Literal,
     "Calories in the recipe"),

    (KG.proteinContent, RDF.Property, KG.Nutrition, RDFS.Literal,
     "Protein content of the recipe"),

    (KG.servesCuisine, RDF.Property, KG.Restaurant, RDFS.Literal,
     "Cuisine served by the restaurant"),

    (KG.aggregateRating, RDF.Property, KG.Restaurant, RDFS.Literal,
     "Aggregate rating of the restaurant"),

    (KG.reviewBody, RDF.Property, KG.Review, RDFS.Literal,
     "Text content of the review")
]

for prop, p_type, domain, range_, comment in properties:
    g.add((prop, RDF.type, p_type))
    g.add((prop, RDFS.domain, domain))
    g.add((prop, RDFS.range, range_))
    g.add((prop, RDFS.comment, Literal(comment)))

 
with open("vocabulary.ttl", "w") as f:
    f.write(g.serialize(format="turtle"))

print("Vocabulary defined successfully")
print(g.serialize(format="turtle"))


Vocabulary defined successfully
@prefix ex: <http://kg-course.io/food-nutrition/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema1: <http://schema.org/> .

ex:Nutrition a rdfs:Class ;
    rdfs:comment "Nutritional facts about a recipe" ;
    rdfs:subClassOf schema1:NutritionInformation .

ex:Recipe a rdfs:Class ;
    rdfs:comment "A food recipe" ;
    rdfs:subClassOf schema1:Recipe .

ex:Restaurant a rdfs:Class ;
    rdfs:comment "An establishment that serves food" ;
    rdfs:subClassOf schema1:FoodEstablishment .

ex:Review a rdfs:Class ;
    rdfs:comment "A user review" ;
    rdfs:subClassOf schema1:Review .

ex:aggregateRating a rdf:Property ;
    rdfs:comment "Aggregate rating of the restaurant" ;
    rdfs:domain ex:Restaurant ;
    rdfs:range rdfs:Literal .

ex:calories a rdf:Property ;
    rdfs:comment "Calories in the recipe" ;
    rdfs:domain ex:Nutrition ;
    rdfs:range rdfs:Literal .

ex:co

In [8]:
import pandas as pd
from rdflib import Graph, Literal, RDF, Namespace, URIRef
from rdflib.namespace import XSD

# graph and namespaces
g = Graph()
SCHEMA = Namespace("http://schema.org/")
KG = Namespace("http://kg-course.io/food-nutrition/")
g.bind("schema", SCHEMA)
g.bind("ex", KG)

recipes_df = pd.read_csv("data/cleaned_recipes.csv", sep=',', engine='python', on_bad_lines='warn').head(10000)
restaurants_df = pd.read_csv("data/cleaned_restaurants.csv", sep=',', engine='python', on_bad_lines='warn').head(10000)
nutrition_df = pd.read_csv("data/cleaned_nutrition.csv", sep=',', engine='python', on_bad_lines='warn').head(10000)
reviews_df = pd.read_csv("data/cleaned_reviews.csv", sep=',', engine='python', on_bad_lines='warn').head(1000)


print("Data loaded successfully ")

def create_kg():
    for index, row in recipes_df.iterrows():
        recipe_uri = KG[f"recipe_{row['RecipeId']}"]
        g.add((recipe_uri, RDF.type, SCHEMA.Recipe))
        g.add((recipe_uri, SCHEMA.name, Literal(row['Name'])))
        g.add((recipe_uri, SCHEMA.recipeCategory, Literal(row['RecipeCategory'])))
        g.add((recipe_uri, SCHEMA.cookTime, Literal(row['CookTime'])))

        nutri_uri = KG[f"nutrition_{row['RecipeId']}"]
        g.add((recipe_uri, SCHEMA.nutrition, nutri_uri))
        g.add((nutri_uri, RDF.type, SCHEMA.NutritionInformation))

    for index, row in restaurants_df.iterrows():
        rest_uri = KG[f"restaurant_{row['Restaurant ID']}"]
        g.add((rest_uri, RDF.type, SCHEMA.FoodEstablishment))
        g.add((rest_uri, SCHEMA.name, Literal(row['Restaurant Name'])))
        g.add((rest_uri, SCHEMA.address, Literal(row['Address'])))
        g.add((rest_uri, SCHEMA.servesCuisine, Literal(row['Cuisines'])))




    g.serialize(destination='KEN4256-structured-KG-Team6.ttl', format='turtle')


create_kg()

Data loaded successfully 


In [9]:
print(g.serialize(format="turtle"))

@prefix ex: <http://kg-course.io/food-nutrition/> .
@prefix schema1: <http://schema.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ex:recipe_100 a schema1:Recipe ;
    schema1:cookTime 1.12e+02 ;
    schema1:name "Bread Pudding with Jack Daniels Sauce" ;
    schema1:nutrition ex:nutrition_100 ;
    schema1:recipeCategory "Dessert" .

ex:recipe_10000 a schema1:Recipe ;
    schema1:cookTime 1.8e+02 ;
    schema1:name "Tomato Paste" ;
    schema1:nutrition ex:nutrition_10000 ;
    schema1:recipeCategory "Low Protein" .

ex:recipe_10002 a schema1:Recipe ;
    schema1:cookTime 5e+00 ;
    schema1:name "Caulinana salad" ;
    schema1:nutrition ex:nutrition_10002 ;
    schema1:recipeCategory "Cauliflower" .

ex:recipe_10003 a schema1:Recipe ;
    schema1:cookTime 2e+00 ;
    schema1:name "Spicy Corn Salad With Avocado Dressing" ;
    schema1:nutrition ex:nutrition_10003 ;
    schema1:recipeCategory "Corn" .

ex:recipe_10004 a schema1:Recipe ;
    schema1:cookTime 1.872e+04 ;
    