In [146]:
# Check for existing libraries in environment
! pip install rdflib lxml



In [147]:
from lxml import etree
import os

# this set up globals for some useful URIs
globals = {
    "xml_id":"{http://www.w3.org/XML/1998/namespace}id",
    "base_data_URI": "https://github.com/falaimo99/sam/data/",
    }

# this function extract the tree from the well formed xml
def extract_tree(path):
    tree = etree.parse(path)
    root = tree.getroot()  
    return root

root = extract_tree("./short_stories/Novellino_II.xml")


In [148]:
from rdflib import Graph, Literal, BNode, Namespace, URIRef
from rdflib.namespace import RDF, RDFS, XSD, DCTERMS, OWL

# Namespaces used by SAM
sam = Namespace("https://purl.org/samcore#")
wdt = Namespace("https://www.wikidata.org/wiki/")
wdp = Namespace("https://www.wikidata.org/wiki/Property:")
cwrc = Namespace("https://sparql.cwrc.ca/ontologies/cwrc.html#")

# Graph Instantiation
def setting_the_graph():
    g = Graph()

    g.bind("sam", sam)
    g.bind("wdt", wdt)
    g.bind("wdp", wdp)
    g.bind("cwrc", cwrc)
    
    return g

# utils dictionary that get filled as the blocks connect themselves
utils_URI = {}

# Function to set up the story element and the mandatory related items
# CharacterList and GroupOfScenes/Scenes
def set_base_story ():
    
    story_URI = URIRef(
        globals["base_data_URI"] + root.find("Story").attrib[globals["xml_id"]]
        )
    utils_URI['story'] = story_URI
    character_list_URI = URIRef(
        story_URI + "_CharacterList"
    )
    utils_URI['CharacterList'] = character_list_URI

    if root.findall("SequenceOfScenes"):
        for SoS in root.findall("SequenceOfScenes"):
            SoS_URI = story_URI + "_" + SoS.attrib[globals["xml_id"]]
            g.add( (story_URI, wdp.P527, SoS_URI) )
            print(f"{SoS_URI} added to Story via wdp:P527")
    
    for scene in root.findall("Scene"):
        scene_URI = story_URI + "_" + scene.attrib[globals["xml_id"]]
        g.add( (story_URI, wdp.P527, scene_URI) )
        print(f"{scene_URI} added to Story via wdp:P527")

    g.add ( (story_URI, RDF.type, sam.Story) )
    g.add ( (character_list_URI, RDF.type, sam.CharacterList) )

    print(
        f"""{story_URI} added as sam:Story;
{character_list_URI} added as sam:CharacterList;
        """
        )

    return g

# This function finds all the characters and adds all the relevant information
# to the graph
def set_characters():
    for character in root.find('CharacterList').findall('Character'):
        character_URI = utils_URI['story']+"_"+ character.attrib[globals['xml_id']]
        g.add((utils_URI['CharacterList'], wdp.P527, character_URI))


In [149]:
# Testing block
g: Graph = setting_the_graph()

set_base_story()

https://github.com/falaimo99/sam/data/novellino_II_anonimo_scene1 added to Story via wdp:P527
https://github.com/falaimo99/sam/data/novellino_II_anonimo_scene2 added to Story via wdp:P527
https://github.com/falaimo99/sam/data/novellino_II_anonimo_scene3 added to Story via wdp:P527
https://github.com/falaimo99/sam/data/novellino_II_anonimo_scene4 added to Story via wdp:P527
https://github.com/falaimo99/sam/data/novellino_II_anonimo_scene5 added to Story via wdp:P527
https://github.com/falaimo99/sam/data/novellino_II_anonimo_scene6 added to Story via wdp:P527
https://github.com/falaimo99/sam/data/novellino_II_anonimo added as sam:Story;
https://github.com/falaimo99/sam/data/novellino_II_anonimo_CharacterList added as sam:CharacterList;
        


<Graph identifier=N99b0619008e745fda6a861d19fb70724 (<class 'rdflib.graph.Graph'>)>

In [157]:
set_characters()
g.print()

@prefix sam: <https://purl.org/samcore#> .
@prefix wdp: <https://www.wikidata.org/wiki/Property:> .

<https://github.com/falaimo99/sam/data/novellino_II_anonimo> a sam:Story ;
    wdp:P527 <https://github.com/falaimo99/sam/data/novellino_II_anonimo_scene1>,
        <https://github.com/falaimo99/sam/data/novellino_II_anonimo_scene2>,
        <https://github.com/falaimo99/sam/data/novellino_II_anonimo_scene3>,
        <https://github.com/falaimo99/sam/data/novellino_II_anonimo_scene4>,
        <https://github.com/falaimo99/sam/data/novellino_II_anonimo_scene5>,
        <https://github.com/falaimo99/sam/data/novellino_II_anonimo_scene6> .

<https://github.com/falaimo99/sam/data/novellino_II_anonimo_CharacterList> a sam:CharacterList ;
    wdp:P527 <https://github.com/falaimo99/sam/data/novellino_II_anonimo_federigoImperadore>,
        <https://github.com/falaimo99/sam/data/novellino_II_anonimo_lapidario>,
        <https://github.com/falaimo99/sam/data/novellino_II_anonimo_prestoGiovanni> 