In [1]:
instance_uri = "https://api.stage.sinopia.io/resource/6dff794b-965b-424f-abcb-cbe16ab9e260"
# change the instance_uri to the URI of the instance that is linked to the work resource you want to serialize for Alma RDF/XML.
# add api key and region for posting the resource to Alma
# https://github.com/LD4P/ils-middleware/wiki/Alma-APIs-in-Airflow

alma_api_key = ""
uri_region = "https://api-na.hosted.exlibrisgroup.com"

In [2]:
!pip install rdflib
!pip install lxml
!pip install requests



In [3]:
import requests
from rdflib import Graph, Namespace, URIRef
from rdflib.namespace import RDF
from lxml import etree as ET
from name_space.alma_ns import alma_namespaces
from copy import deepcopy


In [4]:
instance_uri = URIRef(instance_uri)
work_uri = None
instance_graph = Graph()
work_graph = Graph()
instance_graph.parse(instance_uri)


<Graph identifier=N1a7670c9d7314796b1912e6e08231613 (<class 'rdflib.graph.Graph'>)>

In [5]:
 # Define the bf namespace
bf = Namespace("http://id.loc.gov/ontologies/bibframe/")
for prefix, url in alma_namespaces:
    work_graph.bind(prefix, url)
work_uri = instance_graph.value(
    subject=URIRef(instance_uri), predicate=bf.instanceOf
        )
work_uri = URIRef(work_uri)
# Explicitly state that work_uri is of type bf:Work
work_graph.add((work_uri, RDF.type, bf.Work))

# parse the work graph
work_graph.parse(work_uri)

# add the instance to the work graph
work_graph.add((work_uri, bf.hasInstance, URIRef(instance_uri)))

# serialize the work graph
bfwork_alma_xml = work_graph.serialize(format="pretty-xml", encoding="utf-8")
tree = ET.fromstring(bfwork_alma_xml)
print(ET.tostring(tree, pretty_print=True).decode("utf-8"))
# save the work graph to a file
with open("work_graph.xml", "wb") as f:
    f.write(bfwork_alma_xml)
    

<rdf:RDF xmlns:bf="http://id.loc.gov/ontologies/bibframe/" xmlns:sinopia="http://sinopia.io/vocabulary/" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:bflc="http://id.loc.gov/ontologies/bflc/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  <bf:Illustration rdf:about="http://id.loc.gov/vocabulary/millus/ill">
    <rdfs:label xml:lang="en">Illustrations</rdfs:label>
  </bf:Illustration>
  <bf:Work rdf:about="https://api.stage.sinopia.io/resource/145d9af5-b0e2-498c-ad4a-1fd21478b22d">
    <rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Monograph"/>
    <rdfs:label xml:lang="en">Krako&#769;w i jego uniwersytet</rdfs:label>
    <sinopia:hasResourceTemplate>pcc:bf2:Monograph:Work</sinopia:hasResourceTemplate>
    <bf:title>
      <bf:Title rdf:nodeID="f0589ae49792743a38433eb267e6d82a7b1">
        <bf:mainTitle xml:lang="en">Krako&#769;w i jego uniwersytet. English</bf:mainTitle>
      </bf:Title>
    </bf:title>
    <bf:title>
      <bf:VariantTitle rdf:n

In [6]:
from lxml import etree as ET
from copy import deepcopy

# Parse the XML file
tree = ET.parse('work_graph.xml')
work_graph = tree.getroot()

# Define namespaces
namespaces = {
    'bf': 'http://id.loc.gov/ontologies/bibframe/',
    'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
    'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
    'bflc': 'http://id.loc.gov/ontologies/bflc/'
}

# Find all bf:Work elements
works = work_graph.xpath('//bf:Work', namespaces=namespaces)
print(f"Found {len(works)} bf:Work elements")

# Create a list to keep track of works to remove
works_to_remove = []

for work in works:
    work_about = work.attrib.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about')
    print(f"Processing bf:Work with rdf:about={work_about}")

    if work_about:
        # Find the bf:relatedTo elements that are directly linked to a bf:Work element
        related_to_elements = work_graph.xpath(f'//bf:relatedTo[@rdf:resource="{work_about}"]', namespaces=namespaces)
        print(f"Found {len(related_to_elements)} bf:relatedTo elements for rdf:about={work_about}")

        cloned = False  # Flag to track if the work has been cloned

        for related_to in related_to_elements:
            # Check if the bf:relatedTo element is within a bflc:Relationship
            relationship = related_to.getparent()
            if relationship.tag == '{http://id.loc.gov/ontologies/bflc/}Relationship':
                # Check if the bflc:Relationship contains a bf:Work element
                if relationship.find('bf:relatedTo/bf:Work', namespaces=namespaces) is not None:
                    print(f"bf:relatedTo element is within a bflc:Relationship that contains a bf:Work element")

                    # Remove the rdf:resource attribute from the bf:relatedTo element
                    related_to.attrib.pop('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource', None)

                    # Check if the bf:Work element already exists within bf:relatedTo
                    existing_work = related_to.find('bf:Work', namespaces=namespaces)
                    if existing_work is None:
                        # Clone the bf:Work element and append it under the bf:relatedTo element
                        cloned_work = deepcopy(work)
                        related_to.append(cloned_work)
                        cloned = True  # Set the flag to True
                        print(f"Cloned bf:Work element appended under bf:relatedTo")

        # Add the original bf:Work element to the list of works to remove only if it was cloned
        if cloned:
            works_to_remove.append(work)

# Remove the original bf:Work elements
for work in works_to_remove:
    work.getparent().remove(work)
    print(f"Removed original bf:Work element with rdf:about={work.attrib.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about')}")

# Print the modified XML
modified_xml = ET.tostring(work_graph, pretty_print=True).decode()
print(modified_xml)

# Save the modified XML to a file
with open('bfwork_alma.xml', 'wb') as f:
    f.write(ET.tostring(work_graph, pretty_print=True))
    f.close()

Found 1 bf:Work elements
Processing bf:Work with rdf:about=https://api.stage.sinopia.io/resource/145d9af5-b0e2-498c-ad4a-1fd21478b22d
Found 1 bf:relatedTo elements for rdf:about=https://api.stage.sinopia.io/resource/145d9af5-b0e2-498c-ad4a-1fd21478b22d
<rdf:RDF xmlns:bf="http://id.loc.gov/ontologies/bibframe/" xmlns:sinopia="http://sinopia.io/vocabulary/" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:bflc="http://id.loc.gov/ontologies/bflc/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  <bf:Illustration rdf:about="http://id.loc.gov/vocabulary/millus/ill">
    <rdfs:label xml:lang="en">Illustrations</rdfs:label>
  </bf:Illustration>
  <bf:Work rdf:about="https://api.stage.sinopia.io/resource/145d9af5-b0e2-498c-ad4a-1fd21478b22d">
    <rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Monograph"/>
    <rdfs:label xml:lang="en">Krako&#769;w i jego uniwersytet</rdfs:label>
    <sinopia:hasResourceTemplate>pcc:bf2:Monograph:Work</sinopia:hasResourceTemplat

In [7]:
# open the file and parse the XML
tree = ET.parse("bfwork_alma.xml")
xslt = ET.parse("xsl/normalize-work-sinopia2alma.xsl")
transform = ET.XSLT(xslt)
alma_xml = transform(tree)
alma_xml = ET.tostring(
        alma_xml, pretty_print=True, encoding="utf-8"
        )
# save the xml to a file
with open("alma-work.xml", "wb") as f:
    f.write(alma_xml)
print(alma_xml.decode("utf-8"))

<bib xmlns:bf="http://id.loc.gov/ontologies/bibframe/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
  <record_format>lcbf_work</record_format>
  <suppress_from_publishing>false</suppress_from_publishing>
  <record>
    <rdf:RDF xmlns:sinopia="http://sinopia.io/vocabulary/" xmlns:bflc="http://id.loc.gov/ontologies/bflc/" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
  <bf:Illustration rdf:about="http://id.loc.gov/vocabulary/millus/ill">
    <rdfs:label xml:lang="en">Illustrations</rdfs:label>
  </bf:Illustration>
  <bf:Work rdf:about="https://api.stage.sinopia.io/resource/145d9af5-b0e2-498c-ad4a-1fd21478b22d">
    <rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Monograph"/>
    <rdfs:label xml:lang="en">Kraków i jego uniwersytet</rdfs:label>
    <sinopia:hasResourceTemplate>pcc:bf2:Monograph:Work</sinopia:hasResourceTemplate>
    <bf:title>
      <bf:Title rdf:nodeID="f0589ae49792743a38433eb267e6d82a7b1">
        <bf:mainTitle xml:lang="en">Kraków i jeg

In [8]:
# handle 400, an update to the record in Alma
def parse_400(result):
    xml_response = ET.fromstring(result)
    xslt = ET.parse("xsl/put_mms_id.xsl")
    transform = ET.XSLT(xslt)
    result_tree = transform(xml_response)
    put_mms_id_str = str(result_tree)
    print(f"put_mms_id_str: {put_mms_id_str}")
    return put_mms_id_str

# post the work to Alma
def NewWorktoAlma():
    with open("alma-work.xml", "rb") as f:
        data = f.read()

        alma_uri = (
            uri_region
            + "/almaws/v1/bibs?"
            + "from_nz_mms_id=&from_cz_mms_id=&normalization=&validate=false"
            + "&override_warning=true&check_match=false&import_profile=&apikey="
            + alma_api_key
        )
        # post to alma
        alma_result = requests.post(
            alma_uri,
            headers={
                "Content-Type": "application/xml; charset=utf-8",
                "Accept": "application/xml",
                "x-api-key": alma_api_key,
            },
            data=data,
        )
        print(f"alma result: {alma_result.status_code}\n{alma_result.text}")
        result = alma_result.content
        status = alma_result.status_code
        if status == 200:
            xml_response = ET.fromstring(result)
            mms_id = xml_response.xpath("//mms_id/text()")
            print(f"Created record {mms_id}")
        elif status == 400:
            # run xslt on the result in case the response is 400 and we need to update the record
            put_mms_id_str = parse_400(result)
            alma_update_uri = (
                uri_region
                + "/almaws/v1/bibs/"
                + put_mms_id_str
                + "?normalization=&validate=false&override_warning=true"
                + "&override_lock=true&stale_version_check=false&cataloger_level=&check_match=false"
                + "&apikey="
                + alma_api_key
            )
            putWorkToAlma(
                alma_update_uri,
                data,
            )
        else:
            raise Exception(f"Unexpected status code from Alma API: {status}")

# update the instance in Alma
def putWorkToAlma(
    alma_update_uri,
    data,
):
    put_update = requests.put(
        alma_update_uri,
        headers={
            "Content-Type": "application/xml; charset=UTF-8",
            "Accept": "application/xml",
        },
        data=data,
    )
    print(f"put update: {put_update.status_code}\n{put_update.text}")
    put_update_status = put_update.status_code
    result = put_update.content
    xml_response = ET.fromstring(result)
    put_mms_id = xml_response.xpath("//mms_id/text()")
    match put_update_status:
        case 200:
            print(f"Updated record {put_mms_id}")
        case 500:
            raise Exception(f"Internal server error from Alma API: {put_update_status}")
        case _:
            raise Exception(
                f"Unexpected status code from Alma API: {put_update_status}"
            )

# Call the function
NewWorktoAlma()

alma result: 400
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><web_service_result xmlns="http://com/exlibris/urm/general/xmlbeans"><errorsExist>true</errorsExist><errorList><error><errorCode>60307</errorCode><errorMessage>Matched work records exist in the catalog with ID: [9779452124003681]</errorMessage><trackingId>E02-2810142031-BH48Y-AWAE1877006467</trackingId></error></errorList></web_service_result>
put_mms_id_str: 9779452124003681
put update: 200
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><bib><mms_id>9779452124003681</mms_id><record_format>lc_bf_work</record_format><linked_record_id/><title>Kraków i jego uniwersytet. English</title><author>Dużyk, Józef</author><date_of_publication>4|||</date_of_publication><holdings link="https://api-na.hosted.exlibrisgroup.com/almaws/v1/bibs/9779452124003681/holdings"/><created_by>exl_api</created_by><created_date>2024-08-01Z</created_date><last_modified_by>exl_api</last_modified_by><last_modified_date>2024-10-28Z</last