In [83]:
import rdflib
from rdflib import Graph, Literal, Namespace, RDF, URIRef, RDFS, XSD
from rdflib.namespace import DC, RDFS, SKOS
from rdflib.util import guess_format
import pprint
import django
django.setup()
from vocabs.models import *
import re

In [84]:
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
DC = Namespace("http://purl.org/dc/elements/1.1/")
DCT = Namespace("http://purl.org/dc/terms/")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
OWL = Namespace("http://www.w3.org/2002/07/owl#")
VOCABS = Namespace("https://vocabs.acdh.oeaw.ac.at/create-concept-scheme/")

In [179]:
myfilettl = "test.ttl"
myfilerdf = "test.rdf"
skosifiedttl = "output.ttl"

In [194]:
class SkosImporter(object):
    # add main language for a vocab
    
    def __init__(self, file):
        self.file = file
        
    def graph_read(self, format=None):
        g = Graph()
        g.bind('skos', SKOS)
        g.bind('dc', DC)
        g.bind('dct', DCT)
        g.bind('rdfs', RDFS)
        g.bind('owl', OWL)
        g.parse(self.file, format=format)
        return g
    
    def parseCs(self, format=None):
        concept_scheme = {}
        g = self.graph_read(format=format)
        for cs in g.subjects(RDF.type, SKOS.ConceptScheme):
            concept_scheme["identifier"] = str(cs)
            for csLabel in g.preferredLabel(cs):
                concept_scheme["label"] = str(csLabel[1])
        return concept_scheme
        
        
    def parseConcepts(self, format=None):
        resources = []
        g = self.graph_read(format=format)
#         concept_scheme = ''
#         concept_scheme_label = ''
#         for cs in g.subjects(RDF.type, SKOS.ConceptScheme):
#             concept_scheme = str(cs)
#             for csLabel in g.preferredLabel(cs):
#                 concept_scheme_label = str(csLabel[1])
                #print(concept_scheme_label)
        for concept in g.subjects(RDF.type, SKOS.Concept):
            concept_desc = {}
            concept_desc["legacy_id"] = str(concept)
#             for prefLabel in g.objects(concept, SKOS.prefLabel):
#                 concept_desc["prefLabel"] = str(prefLabel)
            
            #Pref Labels
            pref_labels = []
            for prefLabelvar in g.preferredLabel(concept):
                prefLabel = {}
                prefLabel["label"] = str(prefLabelvar[1])
                lang = re.search("lang='(.{2,3})'", str(prefLabelvar))
                if lang is not None:
                    prefLabel["lang"] = lang.group(1)
                pref_labels.append(prefLabel)
            concept_desc["prefLabel"] = pref_labels
            
            #ALt Labels
            alt_labels =[]
            for altLabel in g.objects(concept, SKOS.altLabel):
                #print(altLabel)
                label = {}
                label["label"] = str(altLabel)
                lang = re.search("lang='(.{2,3})'", altLabel)
                if lang is not None:
                    label["lang"] = lang.group(1)
                alt_labels.append(label)
                
                
            concept_desc["altLabel"] = alt_labels
            for inScheme in g.objects(concept, SKOS.inScheme):
                concept_desc["inScheme"] = str(inScheme)
            for broader in g.objects(concept, SKOS.broader):
                concept_desc["broader"] = str(broader)
            for narrower in g.objects(concept, SKOS.narrower):
                concept_desc["narrower"] = str(narrower)
            for definition in g.objects(concept, SKOS.definition):
                concept_desc["definition"] = str(definition)
            for note in g.objects(concept, SKOS.note):
                concept_desc["note"] = str(note)
            for scopeNote in g.objects(concept, SKOS.scopeNote):
                concept_desc["scopeNote"] = str(scopeNote)
            for exactMatch in g.objects(concept, SKOS.exactMatch):
                concept_desc["exactMatch"] = str(exactMatch)
            for source in g.objects(concept, SKOS.source):
                concept_desc["source"] = str(source)
            for notation in g.objects(concept, SKOS.notation):
                concept_desc["notation"] = str(notation)
            for creator in g.objects(concept, DC.creator):
                concept_desc["creator"] = str(creator)
#             for pred, obj in g.predicate_objects(concept):
#                 concept_desc[str(pred)] = str(obj)
            resources.append(concept_desc)
        
        return resources
    
    
    def uploadRdf(self, format=None):
        cs_data = self.parseCs(format=format)
        concepts_data = self.parseConcepts(format=format)
        cs_uri = cs_data.get("identifier")
        cs_label = cs_data.get("label")
        cs = SkosConceptScheme.objects.create(identifier=cs_uri, title=cs_label, created_by=User.objects.get(username=''))
        cs.save
        # dealing with concepts
        for concept in concepts_data:
            concept_legacyid = concept.get("legacy_id")
            concept_inScheme = concept.get("inScheme", "None")
            concept_definition = concept.get("definition", "None")
            concept_notation = concept.get("notation", "None")
            concept_creator = concept.get("creator", "None")
            for prefLabel in concept.get("prefLabel"):
                concept_prefLabel = prefLabel.get("label")
                concept_prefLabel_lang = prefLabel.get("lang")
            new_concept = SkosConcept.objects.create(
                pref_label=concept_prefLabel, legacy_id=concept_legacyid,
                pref_label_lang=concept_prefLabel_lang, scheme=SkosConceptScheme.objects.get(identifier=concept_inScheme),
                notation=concept_notation, created_by=User.objects.get(username=''))
            new_concept.save()
        for concept in concepts_data:          
            if concept.get("broader") is not None:
                concept_broader = concept.get("broader")
                update_concept = SkosConcept.objects.filter(legacy_id=concept.get("legacy_id")).update(broader_concept=SkosConcept.objects.get(legacy_id=concept_broader))
        # need to rebuild tree
        mytree = SkosConcept.objects.rebuild()
        return mytree
       
# separate import of relationships
               

In [195]:
myimport = SkosImporter(skosifiedttl)
myimport.parseCs(format="ttl")

{'identifier': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels',
 'label': 'DHA Taxonomy Import Relationships'}

In [196]:
# format default is rdf, if ttl must be passed as format="ttl"
myimport.parseConcepts(format="ttl")

[{'legacy_id': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels#concept36',
  'prefLabel': [{'label': 'NER', 'lang': 'en'}],
  'altLabel': [{'label': 'Named Entity Recognition'}],
  'inScheme': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels',
  'broader': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels#concept33',
  'definition': 'a process of identifying mentions of individuals/entities in a text',
  'notation': 'ner',
  'creator': 'ACDH-OEAW Team'},
 {'legacy_id': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels#concept4',
  'prefLabel': [{'label': 'RDF', 'lang': 'en'}],
  'altLabel': [{'label': 'Resource Description Framework'}],
  'inScheme': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels',
  'broader': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels#concept1',
  'definition': 'a group of W3C specifications designed as a metadata data model to provide interoperability between applications that exchange machine-understandable inf

In [197]:
myimport.uploadRdf(format="ttl")