In [118]:
import lxml.etree as ET

In [119]:
file = 'vocabs/data/japbibthesaurus_03.rdf'

In [140]:
class SkosReader(object):
    """
    reads a skos file (RDF/XML) and returns a list of dictionaries containing skos:Concept properties
    concept-id: (URL)
    notation: (derived from concept-id)
    pref_labels: (list of labels)
    skos:broader: (list of broader elements)
    skos:narrower: ...
    skos:closeMatch
    skos:inScheme (list of all conceptSchemes a concept is related to
     """
    
    def __init__(self, skosfile):
        self.ns_skos = "http://www.w3.org/2004/02/skos/core#"
        self.ns_rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
        
        try:
            self.tree = ET.parse(skosfile)
            self.parsed_file = ET.tostring(self.tree, encoding="utf-8")
        except:
            self.parsed_file = "parsing didn't work"

        try:
            self.extractedDescriptions = self.tree.findall('rdf:Description', namespaces={"rdf":self.ns_rdf})
            self.numberOfextractedDescriptions = len(self.extractedDescriptions)
        except:
            self.extractedDescriptions = "rdf:Descriptions could not be extracted."
            self.numberOfextractedDescriptions = 0
    
    def returnConcepts(self):
        concepts = []
        for x in self.extractedDescriptions:
            concept ={}
            concept["id"] = x.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about']
            concept["notation"] = x.find('skos:notation', namespaces={"skos":self.ns_skos})
            skos_pref_labels = []
            for y in x.findall('skos:prefLabel', namespaces={"skos":self.ns_skos}):
                skos_label = {}
                skos_label['text'] = y.text
                skos_label['lang'] = y.attrib['{http://www.w3.org/XML/1998/namespace}lang']
                skos_pref_labels.append(skos_label)
            concept["pref_labels"] = skos_pref_labels
            #concept["broader"] = x.findall('skos:broader', namespaces={"skos":self.ns_skos})
            skos_broader = []
            for y in x.findall('skos:broader', namespaces={"skos":self.ns_skos}):
                broader = {}
                broader['uri'] = y.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource']
                broader['notation'] =broader['uri'].split('/')[-1]
                skos_broader.append(broader)
            concept['broader'] = skos_broader
            skos_narrower = []
            for y in x.findall('skos:narrower', namespaces={"skos":self.ns_skos}):
                narrower = {}
                narrower['uri'] = y.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource']
                narrower['notation'] =narrower['uri'].split('/')[-1]
                skos_narrower.append(narrower)
            concept['narrower'] = skos_narrower
            skos_closeMatch = []
            for y in x.findall('skos:closeMatch', namespaces={"skos":self.ns_skos}):
                closeMatch = {}
                closeMatch['uri'] = y.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource']
                closeMatch['notation'] =closeMatch['uri'].split('/')[-1]
                skos_closeMatch.append(closeMatch)
            concept['closeMatch'] = skos_closeMatch 
            skos_schemes = []
            for y in x.findall('skos:inScheme', namespaces={"skos":self.ns_skos}):
                skos_schemes.append(y.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource'])
            concept["schemes"] = skos_schemes
            concepts.append(concept)
        return concepts

In [141]:
hansi = SkosReader(file)

In [142]:
hansi.numberOfextractedDescriptions

1422

In [143]:
for x in hansi.returnConcepts():
    print(x)

{'closeMatch': [], 'narrower': [], 'pref_labels': [], 'notation': None, 'schemes': [], 'id': 'https://acdh.oeaw.ac.at/vocabs/japbibthesaurus/JapBibThesaurus', 'broader': []}
{'closeMatch': [], 'narrower': [], 'pref_labels': [{'text': 'Sachbezug', 'lang': 'de'}], 'notation': None, 'schemes': [], 'id': 'https://acdh.oeaw.ac.at/vocabs/japbibthesaurus/1000', 'broader': []}
{'closeMatch': [], 'narrower': [], 'pref_labels': [{'text': 'Allgemeines und Varia', 'lang': 'de'}], 'notation': None, 'schemes': [], 'id': 'https://acdh.oeaw.ac.at/vocabs/japbibthesaurus/11', 'broader': []}
{'closeMatch': [], 'narrower': [], 'pref_labels': [{'text': 'Einführende allgemeine Werke', 'lang': 'de'}], 'notation': None, 'schemes': [], 'id': 'https://acdh.oeaw.ac.at/vocabs/japbibthesaurus/10101', 'broader': []}
{'closeMatch': [{'notation': 'Kategorie:Reiseführer', 'uri': 'http://de.dbpedia.org/resource/Kategorie:Reiseführer'}], 'narrower': [], 'pref_labels': [{'text': 'Reiseführer', 'lang': 'de'}], 'notation':