In [1]:
from rdflib import Graph

In [2]:
uri = 'http://id.loc.gov/authorities/names/n2003042876'
graph = Graph()
graph.parse(f'{uri}.rdf') 

<Graph identifier=N40d8d73fabd345a78ec1df91dfd5145d (<class 'rdflib.graph.Graph'>)>

In [4]:
from pydantic import BaseModel
from typing import Optional
from api.src.schemas.authorities.authority import Variant, AdminMetadata, Element

class Uri(BaseModel):
    value: str
    label: str
    base: Optional[str]

class Affiliation(BaseModel):
    organization: Uri
    affiliationStart: Optional[str]
    affiliationEnd: Optional[str]
    
class Agents(BaseModel):
    type: str 
    adminMetadata: AdminMetadata 
    authoritativeLabel: str
    elementList: list[Element]
    fullerName: Optional[Element] = None
    birthDate: Optional[str] = None
    birthPlace: Optional[str] = None
    deathDate: Optional[str] = None
    hasAffiliation: Optional[list[Affiliation]] = None
    occupation: Optional[list[Uri]] = None
    fieldOfActivity: Optional[list[Uri]] = None
    hasCloseExternalAuthority: Optional[list[Uri]] = None
    hasExactExternalAuthority: Optional[list[Uri]] = None
    hasVariant: Optional[list[Variant]] = None
    subjectOf: Optional[list[Uri]] = None
    contributorOf: Optional[list[Uri]] = None
    isMemberOfMADSCollection: str

In [10]:
from api.src.function.loc.agents.fieldOfActivity import GetFieldOfActivity
# from api.src.schemas.authorities.agents import Agents
from api.src.function.loc.agents.Occuption import GetOccuption
from api.src.function.loc.agents.Affiliation import GetAffiliation
from api.src.function.loc.agents.BirthPlace import GetBirthPlace
from api.src.function.loc.agents.Date import GetDate
from api.src.function.loc.agents.Variant import GetVariant
from api.src.function.loc.Uri import GetUri
from api.src.function.loc.agents.FullerName import GetFullerName
from api.src.function.loc.agents.ElementList import GetElementList
from api.src.function.loc.getType import GetType

def ParserAgents(graph, uri):
    # Type
    tipo = GetType(graph, uri)

    # adminMetadata
    adminMetadata = {
      "assigner": "http://id.loc.gov/vocabulary/organizations/dlc", 
      "identifiedBy": [ {
         "type": "Lccn",
          "assigner": "http://id.loc.gov/vocabulary/organizations/dlc",
          "value": uri.split('/')[-1]        
      }]}
    
    obj = {
     "type": tipo,
      "adminMetadata": adminMetadata,
      "isMemberOfMADSCollection": f'http://bibliokeia.com/authorities/{tipo}/'}

    qAuthoritativeLabel = f"""PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX madsrdf: <http://www.loc.gov/mads/rdf/v1#>
    SELECT ?authoritativeLabel 
    WHERE  {{
    <{uri}> madsrdf:authoritativeLabel ?authoritativeLabel .
      }}"""
    r = graph.query(qAuthoritativeLabel)
    [bindings] = r.bindings
    authoritativeLabel = bindings.get('authoritativeLabel').toPython()
    obj['authoritativeLabel'] = authoritativeLabel
    
    # ElementList
    obj = GetElementList(graph, uri, obj) 
    
    # fullerName
    obj = GetFullerName(graph, uri, obj)

    # hasCloseExternaluri
    obj = GetUri(uri, graph, "hasCloseExternalAuthority", obj)

    # hasExactExternalAuthority
    obj = GetUri(uri, graph, "hasExactExternalAuthority", obj)

    # Variant
    obj = GetVariant(uri, graph, obj)

    # RWO
    token = uri.split("/")[-1]
    rwo = f'http://id.loc.gov/rwo/agents/{token}'
    # BirthDate
    obj = GetDate(rwo, 'birthDate', graph, obj)
    # deathDate
    obj = GetDate(rwo, 'deathDate', graph, obj)
    # BirthPlace
    obj = GetBirthPlace(rwo, graph, obj)
    # Affiliation
    obj = GetAffiliation(rwo, graph, obj)
    # Occuptions
    obj = GetOccuption(rwo, graph, obj)
    obj = GetFieldOfActivity(rwo, graph, obj)

    # response = Agents(**obj)
    
    return obj

obj = ParserAgents(graph, uri)
obj['hasAffiliation'][0]

{'organization': {'value': 'http://id.loc.gov/authorities/names/n50075769',
  'label': 'Drexel University',
  'base': 'id.loc.gov'}}

In [15]:
class Uri(BaseModel):
    value: str
    label: str
    base: Optional[str]

class Affiliation(BaseModel):
    organization: Uri
    affiliationStart: Optional[str] = None
    affiliationEnd: Optional[str] = None

obj = {'organization': {'value': 'http://id.loc.gov/authorities/names/n50075769',
  'label': 'Drexel University',
  'base': 'id.loc.gov'}}
obj2 = {'value': 'http://id.loc.gov/authorities/names/n50075769',
  'label': 'Drexel University',
  'base': 'id.loc.gov'}
a = Uri(**obj2)
a

Uri(value='http://id.loc.gov/authorities/names/n50075769', label='Drexel University', base='id.loc.gov')

In [16]:
aff = Affiliation(**obj)
aff

Affiliation(organization=Uri(value='http://id.loc.gov/authorities/names/n50075769', label='Drexel University', base='id.loc.gov'), affiliationStart=None, affiliationEnd=None)

In [6]:
from pydantic import BaseModel
from typing import Optional
# from api.src.schemas.authorities.agents import Affiliation, Uri
from api.src.schemas.authorities.authority import Variant, AdminMetadata, Element

class Uri(BaseModel):
    value: str
    label: str
    base: Optional[str]

class Affiliation(BaseModel):
    organization: Uri
    affiliationStart: Optional[str]
    affiliationEnd: Optional[str]

class Agents(BaseModel):
    type: str 
    adminMetadata: AdminMetadata 
    elementList: list[Element]
    fullerName: Optional[Element] = None
    birthDate: Optional[str] = None
    birthPlace: Optional[str] = None
    deathDate: Optional[str] = None
    hasAffiliation: Optional[list[Affiliation]] = None
    occupation: Optional[list[Uri]] = None
    fieldOfActivity: Optional[list[Uri]] = None
    hasCloseExternalAuthority: Optional[list[Uri]] = None
    hasExactExternalAuthority: Optional[list[Uri]] = None
    hasVariant: Optional[list[Variant]] = None
    subjectOf: Optional[list[Uri]] = None
    contributorOf: Optional[list[Uri]] = None
    isMemberOfMADSCollection: str

In [8]:
from api.src.function.loc.getType import GetType
from api.src.function.loc.agents.ElementList import GetElementList
from api.src.function.loc.agents.FullerName import GetFullerName
from api.src.function.loc.Uri import GetUri
from api.src.function.loc.agents.Variant import GetVariant
from api.src.function.loc.agents.Date import GetDate
from api.src.function.loc.agents.Occuption import GetOccuption
from api.src.function.loc.agents.Affiliation import GetAffiliation
from api.src.function.loc.agents.BirthPlace import GetBirthPlace
from api.src.function.loc.agents.fieldOfActivity import GetFieldOfActivity

def ParserAgents(graph, authority):
    # Type
    tipo = GetType(graph, authority)

    # adminMetadata
    adminMetadata = {
      "assigner": "http://id.loc.gov/vocabulary/organizations/dlc", 
      "identifiedBy": [ {
         "type": "Lccn",
          "assigner": "http://id.loc.gov/vocabulary/organizations/dlc",
          "value": authority.split('/')[-1]        
      }]}
    
    obj = {
     "type": tipo,
      "adminMetadata": adminMetadata,
      "isMemberOfMADSCollection": f'http://bibliokeia.com/authorities/{tipo}/'}
    
    # ElementList
    obj = GetElementList(graph, authority, obj) 
    
    # fullerName
    obj = GetFullerName(graph, authority, obj)

    # hasCloseExternalAuthority
    obj = GetUri(authority, graph, "hasCloseExternalAuthority", obj)

    # hasExactExternalAuthority
    obj = GetUri(authority, graph, "hasExactExternalAuthority", obj)

    # Variant
    obj = GetVariant(authority, graph, obj)

    # RWO
    token = authority.split("/")[-1]
    rwo = f'http://id.loc.gov/rwo/agents/{token}'
    # BirthDate
    obj = GetDate(rwo, 'birthDate', graph, obj)
    # deathDate
    obj = GetDate(rwo, 'deathDate', graph, obj)
    # BirthPlace
    obj = GetBirthPlace(rwo, graph, obj)
    # Affiliation
    obj = GetAffiliation(rwo, graph, obj)
    # Occuptions
    obj = GetOccuption(rwo, graph, obj)
    obj = GetFieldOfActivity(rwo, graph, obj)

    # response = Agents(**obj)
    
    return obj

In [17]:
obj = ParserAgents(graph, uri)
obj['hasAffiliation']

[{'organization': {'value': 'http://id.loc.gov/authorities/names/n50075769',
   'label': 'Drexel University',
   'base': 'id.loc.gov'}},
 {'organization': {'value': 'http://id.loc.gov/authorities/names/n79061226',
   'label': 'Johns Hopkins University',
   'base': 'id.loc.gov'}},
 {'organization': {'value': 'http://id.loc.gov/authorities/names/nr99039944',
   'label': 'Eastern Connecticut State University',
   'base': 'id.loc.gov'}},
 {'organization': {'value': 'http://id.loc.gov/authorities/names/n79055384',
   'label': 'Princeton University',
   'base': 'id.loc.gov'}},
 {'organization': {'value': 'http://id.loc.gov/authorities/names/n79043367',
   'label': 'Yale University',
   'base': 'id.loc.gov'}}]

In [17]:
obj['hasVariant'][0]['elementList'][0]['elementValue']

{'value': 'Larcher, Walter,'}

In [10]:
response = Agents(**obj)
response

ValidationError: 2 validation errors for Agents
hasVariant.0.elementList.0.elementValue.lang
  Field required [type=missing, input_value={'value': 'Larcher, Walter,'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.0.3/v/missing
hasVariant.0.elementList.1.elementValue.lang
  Field required [type=missing, input_value={'value': '1929-'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.0.3/v/missing

In [18]:
import httpx

url = 'http://localhost:8000/import/loc/agents?uri=http%3A%2F%2Fid.loc.gov%2Fauthorities%2Fnames%2Fn79125202'

response = httpx.get(url)
response

<Response [200 OK]>

In [19]:
response.json()

{'type': 'PersonalName',
 'adminMetadata': {'assigner': 'http://id.loc.gov/vocabulary/organizations/dlc',
  'descriptionModifier': 'http://id.loc.gov/vocabulary/organizations/brmninpa',
  'creationDate': '2023-07-18',
  'descriptionLanguage': 'http://id.loc.gov/vocabulary/languages/por',
  'generationProcess': 'BiblioKeia v.1',
  'generationDate': '2023-07-18T10:04:06',
  'identifiedBy': [{'type': 'Lccn',
    'assigner': 'http://id.loc.gov/vocabulary/organizations/dlc',
    'value': 'n79125202'}],
  'status': {'value': 'mstatus:new', 'label': 'novo'}},
 'elementList': [{'type': 'FullNameElement',
   'elementValue': {'value': 'Larcher, W.', 'lang': None}},
  {'type': 'FullNameElement',
   'elementValue': {'value': '(Walter),', 'lang': None}},
  {'type': 'DateNameElement',
   'elementValue': {'value': '1929-', 'lang': None}}],
 'fullerName': None,
 'birthDate': None,
 'birthPlace': None,
 'deathDate': None,
 'hasAffiliation': None,
 'occupation': None,
 'fieldOfActivity': None,
 'hasClos

In [20]:
request = Agents(**response.json())
request

Agents(type='PersonalName', adminMetadata=AdminMetadata(assigner='http://id.loc.gov/vocabulary/organizations/dlc', descriptionModifier='http://id.loc.gov/vocabulary/organizations/brmninpa', creationDate=datetime.date(2023, 7, 18), descriptionLanguage='http://id.loc.gov/vocabulary/languages/por', generationProcess='BiblioKeia v.1', generationDate='2023-07-18T10:04:06', identifiedBy=[IdentifiedBy(type='Lccn', assigner='http://id.loc.gov/vocabulary/organizations/dlc', value='n79125202')], status=Status(value='mstatus:new', label='novo')), elementList=[Element(type='FullNameElement', elementValue=Label(value='Larcher, W.', lang=None)), Element(type='FullNameElement', elementValue=Label(value='(Walter),', lang=None)), Element(type='DateNameElement', elementValue=Label(value='1929-', lang=None))], fullerName=None, birthDate=None, birthPlace=None, deathDate=None, hasAffiliation=None, occupation=None, fieldOfActivity=None, hasCloseExternalAuthority=[Uri(value='http://www.wikidata.org/entity/Q2

In [41]:
import httpx

from api.src.function.authorities.makeLabel import MakeLabel


def GetImagem(uri):
    id = uri.split('/')[-1]

    url = 'https://www.wikidata.org/w/api.php'
    params = {
                'action': 'wbgetentities',
                'ids': id,
                'props': 'claims',
                'languages': 'pt',
                'format': 'json'
            }
    response = httpx.get(url, params=params) 
    response = response.json()
    if response.get('error'):
        return False
    else:
        # file = response['entities'][id]['claims']['P18'][0]['mainsnak']['datavalue']['value']
        file = response['entities'][id]['claims'].get('P18')
        if file:
            img = file[0]['mainsnak']['datavalue']['value']
            imagem = f'http://commons.wikimedia.org/wiki/Special:FilePath/{img}'
            return imagem
        else:
            return False
    
def MakeDocAgents(request, id):

    doc = {
            'id': id,
            'type': request.type,
            "creationDate": request.adminMetadata.creationDate.strftime('%Y-%m-%d'), 
            "label": f'{MakeLabel(request.elementList)}' ,
            "isMemberOfMADSCollection": request.isMemberOfMADSCollection
        }
    
    if request.fullerName:
        doc['fullerName'] = request.fullerName.elementValue.value
    if request.birthDate:
        doc['birthDate'] = request.birthDate
    if request.birthPlace:
        doc['birthPlace'] = request.birthPlace
    if request.deathDate:
        doc['deathDate'] = request.deathDate
    
    # hasAffiliation  
    if request.hasAffiliation:
        affiliations = list()
        for i in request.hasAffiliation:
            a = {
                'id': f"{id}/hasAffiliation#{i.organization.value.split('/')[-1]}",
                'organization': i.organization.label,
                'affiliationStart': i.affiliationStart,
            }
            if i.affiliationEnd:
                a['affiliationEnd'] = i.affiliationEnd
            affiliations.append(a)
        doc['hasAffiliation'] = affiliations
    
    # hasVariant
    if request.hasVariant:
        variants = list()
        for i in request.hasVariant:
            label = [j.elementValue.value for j in i.elementList]
            label = " ".join(label)
            variants.append(label)
        doc['variant'] = variants

    # hasCloseExternalAuthority
    if request.hasCloseExternalAuthority:
        uris = list()
        for i in request.hasCloseExternalAuthority:
            uri = {
                    'id': f"{id}/hasCloseExternalAuthority#{i.value.split('/')[-1]}",
                    'uri': i.value, 
                    'label': i.label, 
                    'base': i.base }
            uris.append(uri)
            if i.base == 'www.wikidata.org':
                imagem = GetImagem(i.value)
                if imagem:
                    doc['imagem'] = imagem
        doc['hasCloseExternalAuthority'] = uris

    # Occupation
    if request.occupation:
        occupations = list()
        for i in request.occupation:
            uri = {
                    'id': f"{id}/occupation#{i.value.split('/')[-1]}",
                    'uri': i.value, 
                    'label': i.label, 
                    'base': i.base }
            occupations.append(uri)
        doc['occupation'] = occupations

    # fieldOfActivity
    if request.fieldOfActivity:
        fields = list()
        for i in request.fieldOfActivity:
            uri = {
                    'id': f"{id}/fieldOfActivity#{i.value.split('/')[-1]}",
                    'uri': i.value, 
                    'label': i.label, 
                    'base': i.base }
            fields.append(uri)
        doc['fieldOfActivity'] = fields

    return doc

In [31]:
i = request.hasCloseExternalAuthority[0]
uri = i.value
id = uri.split('/')[-1]
url = 'https://www.wikidata.org/w/api.php'
params = {
                'action': 'wbgetentities',
                'ids': id,
                'props': 'claims',
                'languages': 'pt',
                'format': 'json'
            }
response = httpx.get(url, params=params)
response

<Response [200 OK]>

In [42]:
doc = MakeDocAgents(request, id)
doc

{'id': 'Q2545305',
 'type': 'PersonalName',
 'creationDate': '2023-07-18',
 'label': 'Larcher, W., (Walter),, 1929-',
 'isMemberOfMADSCollection': 'http://bibliokeia.com/authorities/PersonalName/',
 'variant': ['Larcher, Walter, 1929-'],
 'hasCloseExternalAuthority': [{'id': 'Q2545305/hasCloseExternalAuthority#Q2545305',
   'uri': 'http://www.wikidata.org/entity/Q2545305',
   'label': 'Walter Larcher',
   'base': 'www.wikidata.org'}]}

In [54]:
doc['hasCloseExternalAuthority'][0]['id'] = 'bka-2/hasCloseExternalAuthority#Q2545305'

In [55]:
doc

{'id': 'bka-2',
 'type': 'PersonalName',
 'creationDate': '2023-07-18',
 'label': 'Larcher, W., (Walter),, 1929-',
 'isMemberOfMADSCollection': 'http://bibliokeia.com/authorities/PersonalName/',
 'variant': ['Larcher, Walter, 1929-'],
 'hasCloseExternalAuthority': [{'id': 'bka-2/hasCloseExternalAuthority#Q2545305',
   'uri': 'http://www.wikidata.org/entity/Q2545305',
   'label': 'Walter Larcher',
   'base': 'www.wikidata.org'}]}

In [56]:
from pysolr import Solr
solr = Solr('http://localhost:8983/solr/authority/', timeout=10)

responseSolr = solr.add([doc], commit=True)

In [44]:
d = {'id': "Q2545305"}

'{\n  "responseHeader":{\n    "status":0,\n    "QTime":286}}\n'

In [46]:
r = solr.delete(q="id:Q2545305/hasCloseExternalAuthority#Q2545305", commit=True)
r

'<?xml version="1.0" encoding="UTF-8"?>\n<response>\n\n<lst name="responseHeader">\n  <int name="status">0</int>\n  <int name="QTime">50</int>\n</lst>\n</response>\n'