In [1]:
import rdflib
from rdflib import Graph, Literal, Namespace, RDF, URIRef, RDFS, XSD
from rdflib.namespace import DC, RDFS, SKOS
from rdflib.util import guess_format
import pprint
import django
django.setup()
from vocabs.models import *
import re

In [2]:
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
DC = Namespace("http://purl.org/dc/elements/1.1/")
DCT = Namespace("http://purl.org/dc/terms/")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
OWL = Namespace("http://www.w3.org/2002/07/owl#")
VOCABS = Namespace("https://vocabs.acdh.oeaw.ac.at/create-concept-scheme/")

In [3]:
myfilettl = "test.ttl"
myfilerdf = "archecategory.rdf"
skosifiedttl = "output.ttl"

In [39]:
class SkosImporter(object):
	"""
	Perform a file parsing and importing SKOS data in database
	"""

	def __init__(self, file, file_format=None, language=None):
		self.file = file
		self.file_format = file_format
		self.language = language

	def _graph_read(self):
		"""Parse a file in RDF Graph"""
		g = Graph()
		g.bind('skos', SKOS)
		g.bind('dc', DC)
		g.bind('dct', DCT)
		g.bind('rdfs', RDFS)
		g.parse(self.file, format=self.file_format)
		return g

	def parse_concept_scheme(self):
		"""
		Reads  graph and finds triples about Concept Scheme
		"""
		concept_scheme = {}
		g = self._graph_read()
		if (None, RDF.type, SKOS.ConceptScheme) in g:
			for x in g.subjects(RDF.type, SKOS.ConceptScheme):
				concept_scheme["identifier"] = str(x)
				for title in g.preferredLabel(x):
					concept_scheme["title"] = str(title[1])
		else:
			raise ValueError("Graph doesn't have a Concept Scheme")

		return concept_scheme


	def parse_concepts(self):
		"""
		Reads graph and finds triples about Concepts.
		Creates a list of dictionaries containing concept's data
		"""
		g = self._graph_read()
		if (None, RDF.type, SKOS.Concept) in g:
			concepts = []
			for x in g.subjects(RDF.type, SKOS.Concept):
				concept = {}
				concept["legacy_id"] = str(x)
				# pref labels
				pref_labels = []
				for pref_label in g.preferredLabel(x):
					label = {}
					label["label"] = str(pref_label[1])
					lang = re.search("lang='(.{2,3})'", str(pref_label))
					if lang is not None:
						label["lang"] = lang.group(1)
					else:
						label["lang"] = "en"
					pref_labels.append(label)
				concept["pref_label"] = pref_labels

				for scheme in g.objects(x, SKOS.inScheme):
					concept["scheme"] = str(scheme)
				for notation in g.objects(x, SKOS.notation):
					concept["notation"] = str(notation)
				for creator in g.objects(x, DC.creator):
					concept["creator"] = str(creator)
				for contributor in g.objects(x, DC.contributor):
					concept["contributor"] = str(contributor)
				for broader_concept in g.objects(x, SKOS.broader):
					concept["broader_concept"] = str(broader_concept)
				concepts.append(concept)
			return concepts
		else:
			ValueError("Graph doesn't have any concepts")    

	def parse_all_data(self):
		concept_scheme = {}
		g = self._graph_read()
		if (None, RDF.type, SKOS.ConceptScheme) in g:
			for x in g.subjects(RDF.type, SKOS.ConceptScheme):
				concept_scheme["identifier"] = str(x)
				for title in g.preferredLabel(x):
					concept_scheme["title"] = str(title[1])
		else:
			raise ValueError("Graph doesn't have a Concept Scheme")
		if (None, RDF.type, SKOS.Concept) in g:
			concepts = []
			for x in g.subjects(RDF.type, SKOS.Concept):
				concept = {}
				concept["legacy_id"] = str(x)
				# pref labels
				pref_labels = []
				for pref_label in g.preferredLabel(x):
					label = {}
					label["label"] = str(pref_label[1])
					lang = re.search("lang='(.{2,3})'", str(pref_label))
					if lang is not None:
						label["lang"] = lang.group(1)
					else:
						label["lang"] = "en"
					pref_labels.append(label)
				concept["pref_label"] = pref_labels

				for scheme in g.objects(x, SKOS.inScheme):
					concept["scheme"] = str(scheme)
				for notation in g.objects(x, SKOS.notation):
					concept["notation"] = str(notation)
				for creator in g.objects(x, DC.creator):
					concept["creator"] = str(creator)
				for contributor in g.objects(x, DC.contributor):
					concept["contributor"] = str(contributor)
				for broader_concept in g.objects(x, SKOS.broader):
					concept["broader_concept"] = str(broader_concept)
				concepts.append(concept)
			concept_scheme["has_concepts"] = concepts
		return concept_scheme


	def upload_data(self):
		"""
		Create and save concept scheme and its concepts in database
		"""
		concept_scheme = self.parse_concept_scheme()
		concept_scheme_uri = concept_scheme.get("identifier")
		concept_scheme_title = concept_scheme.get("title")
		concept_scheme = SkosConceptScheme.objects.create(
			identifier=concept_scheme_uri,
			title=concept_scheme_title, created_by=User.objects.get(username='kzaytseva')
			)
		concept_scheme.save()

		for concept in self.parse_concepts():
			concept_legacy_id = concept.get("legacy_id")
			concept_inscheme = concept.get("scheme")
			concept_notation = concept.get("notation", "")
			concept_creator = concept.get("creator", "")
			concept_contributor = concept.get("contributor", "")
			for pref_label in concept.get("pref_label"):
				concept_pref_label = pref_label.get("label")
				concept_pref_label_lang = pref_label.get("lang")
			new_concept = SkosConcept.objects.create(
				legacy_id=concept_legacy_id,
				scheme=SkosConceptScheme.objects.get(identifier=concept_inscheme),
				pref_label=concept_pref_label, pref_label_lang=concept_pref_label_lang,
				notation=concept_notation, creator=concept_creator,
				contributor=concept_contributor, created_by=User.objects.get(username='kzaytseva')
				)
			new_concept.save()
		# add relationships
		for concept in self.parse_concepts():
			if concept.get("broader_concept") is not None:
				update_concept = SkosConcept.objects.filter(
					legacy_id=concept.get("legacy_id")).update(
					broader_concept=SkosConcept.objects.get(legacy_id=concept.get("broader_concept"))
					)
			else:
				pass

		return SkosConcept.objects.rebuild()

In [40]:
skos_vocab = SkosImporter(file=skosifiedttl, file_format="ttl")
skos_vocab.parse_all_data()

{'identifier': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels',
 'title': 'DHA Taxonomy Import Relationships',
 'has_concepts': [{'legacy_id': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels#concept31',
   'pref_label': [{'label': 'text encoding', 'lang': 'en'}],
   'scheme': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels',
   'notation': 'text-encoding',
   'creator': 'ACDH-OEAW Team'},
  {'legacy_id': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels#concept28',
   'pref_label': [{'label': 'historical text', 'lang': 'en'}],
   'scheme': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels',
   'notation': 'historical-text',
   'creator': 'ACDH-OEAW Team',
   'broader_concept': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels#concept23'},
  {'legacy_id': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-import-rels#concept2',
   'pref_label': [{'label': 'CIDOC CRM', 'lang': 'en'}],
   'scheme': 'https://vocabs.acdh.oeaw.ac.at/dha-taxonomy-imp

In [41]:
skos_vocab.upload_data()