# Graph Builder

Dieses Notebook baut den Graphen auf Basis der erstellen Ontologie und den verarbeiteten Daten.

In [1]:
import pandas as pd
import numpy as np
import rdflib

# Import der Daten

In [45]:
df_metadata = pd.read_csv('metadata/CM_Seiten_Metadaten_aggregated.csv', sep='\t')
df_data = pd.read_csv('final_data_aggregated.csv', sep=';')

# Erstellen des Graphen


## Load the ontology

In [3]:
from rdflib import Graph

# Create an empty RDF graph
g = Graph()

# Load the ontology from the TTL file into the graph
g.parse("ontology.ttl", format="ttl")

# Now, the ontology is loaded into the `g` Graph object


<Graph identifier=Nda89dbbb33d34ab591a8a803d67c6d82 (<class 'rdflib.graph.Graph'>)>

In [50]:
df_metadata['Year_From'] = df_metadata['Year_From'].round(0).astype(int)
df_metadata['Year_To'] = df_metadata['Year_To'].round(0).astype(int)

TypeError: loop of ufunc does not support argument 0 of type str which has no callable rint method

In [51]:
df_metadata

Unnamed: 0,Zs_Caption,Year_From,Year_To,Date_From,Date_To
0,... Bericht der Jüdischen Haushaltungsschule z...,1835.0,1835.0,1821-01-01,1910-01-01
1,... Bericht ueber den Verein für Westfalen und...,1932.0,1932.0,1836-01-01,1836-01-01
2,... Bericht ueber den Verein für die Provinz W...,1871.0,1871.0,1835-01-01,1835-01-01
3,... Jahresbericht der Israelitischen Waisenans...,1836.0,1836.0,1915-01-01,1917-01-01
4,... Jahresbericht der Jüdischen Frauenvereinig...,1870.0,1870.0,1855-01-01,1922-01-01
...,...,...,...,...,...
611,Illustrirter jüdischer Familienkalender,,,1881-01-01,1907-01-01
612,Jahresbericht der Jugendgruppe (begründet von ...,,,1913-01-01,1913-01-01
613,Jüdisches Gemeindeblatt für den Verband der Ku...,,,1937-08-01,1938-11-01
614,Aḥiasaf,,,1893-01-01,1922-01-01


In [68]:
import hashlib
from rdflib import Graph, Namespace, RDF, RDFS, Literal, URIRef
import uuid
import base64

# Define the namespace for the ontology
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
jl = Namespace("http://data.judaicalink.org/ontology/")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
gndo = Namespace("http://d-nb.info/standards/elementset/gnd#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
edm = Namespace("http://www.europeana.eu/schemas/edm/")
dc = Namespace("http://purl.org/dc/elements/1.1/")
dcterms = Namespace("http://purl.org/dc/terms/")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
geo = Namespace("http://www.opengis.net/ont/geosparql#")
cm = Namespace("http://data.judaicalink.org/ontology/cm")

# Initialize the graph
graph = Graph()

graph.bind("skos", skos)
graph.bind("jl", jl)
graph.bind("foaf", foaf)
graph.bind("gndo", gndo)
graph.bind("owl", owl)
graph.bind("edm", edm)
graph.bind("dc", dc)
graph.bind("dcterms", dcterms)
graph.bind("rdfs", rdfs)
graph.bind("geo", geo)
graph.bind("cm", cm)


# Add data from the dataframe
for index, row in df_metadata.iterrows():
    hashed_title =  hashlib.sha256(row['Zs_Caption'].encode()).hexdigest()

    uuid_from_hash = uuid.uuid5(uuid.NAMESPACE_OID, hashed_title)
    uri = URIRef(f"http://data.judaicalink.org/data/cm/{str(uuid_from_hash)}")
    graph.add((uri, RDF.type, cm.journal))
    graph.add((uri, cm.title, Literal(row['Zs_Caption'])))
    if not pd.isna(row['Year_From']):
        graph.add((uri, cm.Year_From, Literal(str(row['Year_From']))))
    if not pd.isna(row['Year_To'] or row['Year_To'] == "nan"):
        graph.add((uri, cm.Year_To, Literal(str(row['Year_To']))))
    if not pd.isnull(row['Date_From'] or row['Date_From'] == "nan"):
        graph.add((uri, cm.Date_From, Literal(row['Date_From'])))
    if not pd.isnull(row['Date_To']):
        graph.add((uri, cm.Date_To, Literal(row['Date_To'])))

# Serialize the graph into Turtle format
#print(graph.serialize(format='turtle'))


1835.0
1932.0
1871.0
1836.0
1870.0
1929.0
1894.0
1889.0
1938.0
1897.0
1919.0
1911.0
1922.0
1922.0
1915.0
1869.0
1920.0
1926.0
1921.0
1937.0
1906.0
1933.0
1938.0
1929.0
1938.0
1930.0
1845.0
1922.0
1851.0
1920.0
1938.0
1938.0
1866.0
1890.0
1890.0
1875.0
1940.0
1925.0
1916.0
1907.0
1907.0
1966.0
1921.0
1902.0
1937.0
1938.0
1928.0
1958.0
1928.0
1936.0
1922.0
1938.0
1906.0
1866.0
1848.0
1893.0
1929.0
1921.0
1923.0
1868.0
1934.0
1934.0
1925.0
1938.0
1929.0
1934.0
1904.0
1938.0
1919.0
1931.0
1938.0
1923.0
1930.0
1937.0
1929.0
1914.0
1919.0
1938.0
1937.0
1937.0
1846.0
1926.0
1926.0
1930.0
1929.0
1938.0
1937.0
1921.0
1919.0
1939.0
1938.0
1920.0
1932.0
1847.0
1823.0
1908.0
1975.0
1897.0
1920.0
1938.0
1913.0
1893.0
1938.0
1938.0
1914.0
1918.0
1922.0
1860.0
1938.0
1841.0
1921.0
1869.0
1924.0
1931.0
1922.0
1914.0
1937.0
1931.0
1922.0
1931.0
1930.0
1933.0
1911.0
1921.0
1939.0
1919.0
1922.0
1935.0
1934.0
1928.0
1929.0
1863.0
1936.0
1840.0
1937.0
1831.0
1873.0
1915.0
1929.0
1898.0
1934.0
1938.0
1923.0