In [1]:
from rdflib import Graph
from pydantic import BaseModel, Field
from typing import Optional
from api.src.schemas.authorities.authority import Uri, Variant, AdminMetadata, Element

In [2]:
class Topic(BaseModel):
    type: str = Field(default='Topic')
    subjectOf: Optional[list[Uri]]
    adminMetadata: AdminMetadata 
    elementList: list[Element]
    hasBroaderAuthority: Optional[list[Uri]]
    hasCloseExternalAuthority: Optional[list[Uri]]
    hasExactExternalAuthority: Optional[list[Uri]]
    hasNarrowerAuthority: Optional[list[Uri]]
    hasNarrowerExternalAuthority: Optional[list[Uri]]
    hasVariant: Optional[list[Variant]]
    isMemberOfMADSCollection: str = Field(default='http://bibliokeia.com/authorities/Topic/')

In [3]:
g = Graph()
g.parse('http://id.loc.gov/authorities/subjects/sh2018002121.rdf')
#g.serialize('exemples/topic.ttl')

<Graph identifier=N1f7adbcc546b496e8947d5c479d1742c (<class 'rdflib.graph.Graph'>)>

In [4]:
def getUri(obj, metadado, authority, graph):
    query = f"""PREFIX madsrdf: <http://www.loc.gov/mads/rdf/v1#>
            SELECT * WHERE  {{
              <{authority}> madsrdf:{metadado} ?value .
              ?value madsrdf:authoritativeLabel ?label
              }}"""
    response = graph.query(query)
    if len(response.bindings) > 0:
        uris = list()
        for i in response.bindings:
          uri = {
                  "value": i.get('value').n3().replace('<','').replace('>',''),
              "label": {
                "value": i.get('label').value,
                "lang": i.get('label').language
              } }
          uris.append(uri)
        obj[metadado] = uris
    return obj

In [22]:
def ParserTopic(graph):
      
  authority = 'http://id.loc.gov/authorities/subjects/sh2018002121'
  prefix = """PREFIX identifiers: <http://id.loc.gov/vocabulary/identifiers/>
  PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
  PREFIX madsrdf: <http://www.loc.gov/mads/rdf/v1#>"""

  qlccn = f"""{prefix}
  SELECT ?o WHERE {{ 
      <{authority}> identifiers:lccn ?o }}"""

  [lccn] = [i[0].value for i in graph.query(qlccn) ]
  adminMetadata = {
      "assigner": "http://id.loc.gov/vocabulary/organizations/dlc",
      "identifiedBy": [ {
          "assigner": "http://id.loc.gov/vocabulary/organizations/dlc",
          "value": lccn        
      }]
  }

  qElementList = f"""{prefix}
  SELECT ?elementValue ?type WHERE  {{
    <{authority}> madsrdf:elementList ?o .
    ?o rdf:rest* ?node .
    ?node rdf:first ?e .
    ?e madsrdf:elementValue ?elementValue .
    ?e rdf:type ?type
    }}"""

  response = graph.query(qElementList)
  elementList = list()
  for i in response.bindings:
      element = {
            "type": i.get('type').split("#")[1],
          "elementValue": {
            "value":  i.get('elementValue').value,
            "lang": i.get('elementValue').language
          }
        }
      elementList.append(element)
  obj = {
      "adminMetadata": adminMetadata,
      "elementList": elementList
  }
# hasVariant
  qVariant = f"""{prefix}
  SELECT ?typeVariant ?typeElement ?elementValue WHERE  {{
	<{authority}> madsrdf:hasVariant ?variant .
  ?variant rdf:type ?typeVariant .
  ?variant madsrdf:elementList ?elementList .
  ?elementList rdf:rest* ?node .
    ?node rdf:first ?e .
    ?e madsrdf:elementValue ?elementValue .
	?e rdf:type ?typeElement .
  FILTER ( ?typeVariant != madsrdf:Variant )
  }}"""
  response = g.query(qVariant)
  if len(response.bindings) > 0:
    variants = list()
    for i in response.bindings:
      variant = {
          'type': i.get('typeVariant').split("#")[1],
          'elementList': [{
              'type': i.get('typeElement').split("#")[1],
              'elementValue': {
                  'value': i.get('elementValue').value,
                  'lang': i.get('elementValue').language
              }
          }]
      }
      variants.append(variant)
  obj['hasVariant'] = variants

  
  # URIS
  metadados = ['hasBroaderAuthority', 
               'hasCloseExternalAuthority',
    'hasExactExternalAuthority',
    'hasNarrowerAuthority',
    'hasNarrowerExternalAuthority']
  for metadado in metadados:
     obj = getUri(obj, metadado, authority, graph)
  
  request = Topic(**obj)
  return request

request = ParserTopic(g)
request.dict()

{'type': 'Topic',
 'subjectOf': None,
 'adminMetadata': {'assigner': 'http://id.loc.gov/vocabulary/organizations/dlc',
  'descriptionModifier': 'http://id.loc.gov/vocabulary/organizations/brmninpa',
  'creationDate': datetime.date(2023, 5, 3),
  'descriptionLanguage': 'http://id.loc.gov/vocabulary/languages/por',
  'generationProcess': 'BiblioKeia v.1',
  'generationDate': '2023-05-03T15:52:34',
  'identifiedBy': [{'assigner': 'http://id.loc.gov/vocabulary/organizations/dlc',
    'value': 'sh2018002121'}],
  'status': {'value': 'mstatus:new', 'label': 'novo'}},
 'elementList': [{'type': 'TopicElement',
   'elementValue': {'value': 'Dark Web', 'lang': 'en'}}],
 'hasBroaderAuthority': [{'value': 'http://id.loc.gov/authorities/subjects/sh2008009697',
   'label': {'value': 'Invisible Web', 'lang': 'en'}}],
 'hasCloseExternalAuthority': [{'value': 'http://id.worldcat.org/fast/2003430',
   'label': {'value': 'Dark Web', 'lang': None}}],
 'hasExactExternalAuthority': None,
 'hasNarrowerAuthor

In [7]:
q = """PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
  PREFIX madsrdf: <http://www.loc.gov/mads/rdf/v1#>
SELECT ?typeVariant ?typeElement ?elementValue WHERE  {
	<http://id.loc.gov/authorities/subjects/sh2018002121> madsrdf:hasVariant ?variant .
  ?variant rdf:type ?typeVariant .
  ?variant madsrdf:elementList ?elementList .
  ?elementList rdf:rest* ?node .
    ?node rdf:first ?e .
    ?e madsrdf:elementValue ?elementValue .
	?e rdf:type ?typeElement .
  FILTER ( ?typeVariant != madsrdf:Variant )
  }"""
response = g.query(q)
response

<rdflib.plugins.sparql.processor.SPARQLResult at 0x1de90d39a20>

In [8]:
response.bindings

[{rdflib.term.Variable('typeVariant'): rdflib.term.URIRef('http://www.loc.gov/mads/rdf/v1#Topic'), rdflib.term.Variable('elementValue'): rdflib.term.Literal('Dark Internet', lang='en'), rdflib.term.Variable('typeElement'): rdflib.term.URIRef('http://www.loc.gov/mads/rdf/v1#TopicElement')},
 {rdflib.term.Variable('typeVariant'): rdflib.term.URIRef('http://www.loc.gov/mads/rdf/v1#Topic'), rdflib.term.Variable('elementValue'): rdflib.term.Literal('Darknet', lang='en'), rdflib.term.Variable('typeElement'): rdflib.term.URIRef('http://www.loc.gov/mads/rdf/v1#TopicElement')}]

In [18]:
for i in response.bindings:
    variant = {
        'type': i.get('typeVariant').split("#")[1],
        'elementList': [{
            'type': i.get('typeElement').split("#")[1],
            'elementValue': {
                'value': i.get('elementValue').value,
                'lang': i.get('elementValue').language
            }
        }]
    }
    print(variant)

{'type': 'Topic', 'elementList': [{'type': 'TopicElement', 'elementValue': {'value': 'Dark Internet', 'lang': 'en'}}]}
{'type': 'Topic', 'elementList': [{'type': 'TopicElement', 'elementValue': {'value': 'Darknet', 'lang': 'en'}}]}
