# Create RDF data from books data

Run Notebook in VS Code https://code.visualstudio.com/docs/datascience/jupyter-notebooks

In [52]:
import kglab

namespaces = {
    "wtm": "http://purl.org/heals/food/",
    "ind": "http://purl.org/heals/ingredient/",
    }

kg = kglab.KnowledgeGraph(
    name = "A recipe KG example based on Food.com",
    namespaces = namespaces,
)

kg
namespaces

{'wtm': 'http://purl.org/heals/food/',
 'ind': 'http://purl.org/heals/ingredient/'}

In [10]:
import rdflib

node = rdflib.URIRef("https://www.food.com/recipe/327593")

kg.add(node, kg.get_ns("rdf").type, kg.get_ns("wtm").Recipe)
kg.add(node, kg.get_ns("wtm").hasCookTime, rdflib.Literal("PT8M", datatype=kg.get_ns("xsd").duration))
kg.add(node, kg.get_ns("wtm").hasIngredient, kg.get_ns("ind").ChickenEgg)
kg.add(node, kg.get_ns("wtm").hasIngredient, kg.get_ns("ind").CowMilk)
kg.add(node, kg.get_ns("wtm").hasIngredient, kg.get_ns("ind").WholeWheatFlour)


In [11]:
for s, p, o in kg.rdf_graph():
    print(s, p, o)


https://www.food.com/recipe/327593 http://purl.org/heals/food/hasCookTime PT8M
https://www.food.com/recipe/327593 http://purl.org/heals/food/hasIngredient http://purl.org/heals/ingredient/CowMilk
https://www.food.com/recipe/327593 http://purl.org/heals/food/hasIngredient http://purl.org/heals/ingredient/WholeWheatFlour
https://www.food.com/recipe/327593 http://purl.org/heals/food/hasIngredient http://purl.org/heals/ingredient/ChickenEgg
https://www.food.com/recipe/327593 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://purl.org/heals/food/Recipe


In [12]:
s = kg.save_rdf_text(format="ttl")
print(s)


@prefix ind: <http://purl.org/heals/ingredient/> .
@prefix wtm: <http://purl.org/heals/food/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<https://www.food.com/recipe/327593> a wtm:Recipe ;
    wtm:hasCookTime "PT8M"^^xsd:duration ;
    wtm:hasIngredient ind:ChickenEgg,
        ind:CowMilk,
        ind:WholeWheatFlour .




In [7]:
namespaces = {
    "wtm": "http://purl.org/dc/terms/",
    "ind": "http://purl.org/heals/ingredient/",
    }

kg = kglab.KnowledgeGraph(
    name = "A book KG example based on Forum books data",
    namespaces = namespaces,
)

kg
namespaces

{'wtm': 'http://purl.org/dc/terms/',
 'ind': 'http://purl.org/heals/ingredient/'}

## Beispiel

Rubinroter Dschungel von Rita Mae Brown

db: book_id = 1094

- findet sich bei Goodreads unter https://www.goodreads.com/book/show/1874541
- tutorial: https://ruthtillman.com/post/introduction-rdf-librarians-metadata/

In [47]:
# example literals

# https://www.dublincore.org/specifications/dublin-core/dc-rdf/


namespaces = {
    "dcterms": "http://purl.org/dc/terms/"
    }

kg = kglab.KnowledgeGraph(
    name = "A book KG based on Forum Citavi data",
    namespaces = namespaces,
)

kg
namespaces

node = rdflib.URIRef("book_uri")
kg.add(node, kg.get_ns("rdf").type, kg.get_ns("dcterms").Recipe)
# kg.add(node, kg.get_ns("dcterms").type, kg.get_ns("dcterms").DCMIType("Text"))
kg.add(node, kg.get_ns("dcterms").title, rdflib.Literal("Rubinroter Dschungel", lang="de"))
kg.add(node, kg.get_ns("dcterms").creator, rdflib.Literal("Rita Mae Brown"))
kg.add(node, kg.get_ns("dcterms").publisher, rdflib.Literal("Rohwolt"))
kg.add(node, kg.get_ns("dcterms").place, rdflib.Literal("Reinbek bei Hamburg")) #kg.get_ns("ind").WholeWheatFlour)

kg.save_rdf("tmp.ttl")

for s, p, o in kg.rdf_graph():
    print(s, p, o)


book_uri http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://purl.org/dc/terms/Recipe
book_uri http://purl.org/dc/terms/title Rubinroter Dschungel
book_uri http://purl.org/dc/terms/creator Rita Mae Brown
book_uri http://purl.org/dc/terms/place Reinbek bei Hamburg
book_uri http://purl.org/dc/terms/publisher Rohwolt


Jetzt mit mehr linked data, indem ich auch die IDs bzw. Namespaces der Library of Congress und Goodreads dazunehme

In [61]:
namespaces = {
    # Dublin Core
    "dcterms": "http://purl.org/dc/terms/",
    # goodreads
    "book": "https://www.goodreads.com/book/show",
    # VIAF 
    "owl": "http://www.w3.org/2002/07/owl#"
    }

kg = kglab.KnowledgeGraph(
    name = "A book KG based on Forum Citavi data",
    namespaces = namespaces,
)

node = rdflib.URIRef("book_uri")
kg.add(node, kg.get_ns("rdf").type, kg.get_ns("dcterms").Recipe)
kg.add(node, kg.get_ns("dcterms").type, rdflib.Literal("Text"))
kg.add(node, kg.get_ns("dcterms").title, rdflib.Literal("Rubinroter Dschungel"))

kg.add(node, kg.get_ns("dcterms").creator, rdflib.Literal("Rita Mae Brown"))
kg.add(node, kg.get_ns("dcterms").publisher, rdflib.Literal("Rohwolt"))
# Link to VIAF
# eigentlich falsch, weil bezieht sich nur auf Autorin NICHT auf Buch!
kg.add(node, kg.get_ns("owl").sameAs, rdflib.URIRef("https://viaf.org/viaf/112278657/"))
kg.add(node, kg.get_ns("dcterms").place, rdflib.Literal("Reinbek bei Hamburg")) #kg.get_ns("ind").WholeWheatFlour)

kg.save_rdf("tmp.ttl")

for s, p, o in kg.rdf_graph():
    print(s, p, o)


book_uri http://purl.org/dc/terms/type Text
book_uri http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://purl.org/dc/terms/Recipe
book_uri http://purl.org/dc/terms/creator Rita Mae Brown
book_uri http://purl.org/dc/terms/title Rubinroter Dschungel
book_uri http://purl.org/dc/terms/place Reinbek bei Hamburg
book_uri http://purl.org/dc/terms/publisher Rohwolt
book_uri http://www.w3.org/2002/07/owl#sameAs https://viaf.org/viaf/112278657/


In [76]:
import os

print(os.path.dirname(os.getcwd()))

/Users/kabr/code/remove-na-lgbtiq-queer-knowledge-graph


Neuer Versuch: csv laden

In [88]:
import os

config = f"""
[CONFIGURATION]
logging_level=DEBUG
[StudentSportDB]
mappings={os.path.dirname(os.getcwd())}/data-modelling/mappings/brown_db.rml.ttl
db_url=mysql+pymysql://root:root@localhost:3306/lgbtiq_kg
         """

config

from icecream import ic
import kglab

namespaces = {
    "ex":  "http://example.com/",
    }

kg = kglab.KnowledgeGraph(
    name = "A KG example with students and sports",
    namespaces = namespaces,
    )

kg.materialize(config);

INFO | 2022-03-08 18:01:04,211 | 10 mapping rules retrieved.
INFO | 2022-03-08 18:01:04,229 | Mapping partition with 1 groups generated.
INFO | 2022-03-08 18:01:04,230 | Maximum number of rules within mapping group: 10.
INFO | 2022-03-08 18:01:04,232 | Mappings processed in 0.579 seconds.
INFO | 2022-03-08 18:01:04,747 | Number of triples generated in total: 0.
