## Configure Environment Variables

To securely manage your API key and endpoint, we'll use a `.env` file. This file should reside in the same directory as your Jupyter Notebook.

**Steps:**

1. **Create a `.env` File:**  
   Create a new file named `.env`.

2. **Add the Following Content:**

   ```env
   ALMA_API_KEY=your_alma_api_key_here
   ALMA_URI_REGION=https://api-na.hosted.exlibrisgroup.com
   ```

In [1]:
# Install necessary Python packages
!pip install requests lxml python-dotenv

You should consider upgrading via the '/Users/jimhahn/.pyenv/versions/3.10.0/bin/python3.10 -m pip install --upgrade pip' command.[0m


In [2]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve Alma API credentials
ALMA_API_KEY = os.getenv('ALMA_API_KEY')
ALMA_URI_REGION = os.getenv('ALMA_URI_REGION')

# Marva URL
xml_url = 'https://bibframe.org/marva/api-production/ldp/e1730979'  # Public XML URL

# Validate environment variables
if not ALMA_API_KEY or not ALMA_URI_REGION:
    raise ValueError("ALMA_API_KEY and ALMA_URI_REGION must be set in the .env file.")

print("✅ Configuration loaded successfully.")
print(f"🔗 XML URL: {xml_url}")

✅ Configuration loaded successfully.
🔗 XML URL: https://bibframe.org/marva/api-production/ldp/e1730979


In [3]:
import logging
import requests
from lxml import etree
from urllib.parse import urlencode, urljoin

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(message)s')


def build_alma_uri(uri_region, alma_api_key):
    """
    Constructs the Alma API URI by concatenating the provided segments.

    Parameters:
    - uri_region (str): Base URL for the Alma API.
    - alma_api_key (str): Alma API key.

    Returns:
    - str: The fully constructed Alma API URI.
    """
    base_path = "/almaws/v1/bibs"
    query_params = {
        "from_nz_mms_id": "",
        "from_cz_mms_id": "",
        "normalization": "",
        "validate": "false",
        "override_warning": "true",
        "check_match": "false",
        "import_profile": "",
        "apikey": alma_api_key
    }
    query_string = urlencode(query_params)
    alma_uri = urljoin(uri_region, base_path) + "?" + query_string
    # logging.debug(f"🔗 Constructed Alma URI: {alma_uri}")
    return alma_uri

def parse_400(result):
    """
    Parses the 400 response from Alma and extracts the MMS ID(s) using XSLT.

    Parameters:
    - result (bytes): The XML response content.

    Returns:
    - list: A list of extracted MMS IDs.
    """
    try:
        xml_response = etree.fromstring(result)
        xslt = etree.parse("xsl/put_mms_id.xsl")
        transform = etree.XSLT(xslt)
        transformed = transform(xml_response)
        mms_ids_str = str(transformed).strip()
        logging.debug(f"Extracted MMS IDs string: '{mms_ids_str}'")
        
        # Split the extracted MMS IDs by newline (in case of multiple IDs)
        if mms_ids_str:
            mms_ids = [
                mms_id for mms_id in mms_ids_str.split('\n') 
                if mms_id and mms_id != "No text found in brackets"
            ]
            # logging.debug(f"Extracted MMS IDs list: {mms_ids}")
            return mms_ids
        else:
            logging.error("❌ No MMS IDs extracted from 400 response.")
            return []
    except etree.XMLSyntaxError as e:
        logging.error(f"❌ Failed to parse 400 response XML: {e}")
        return []
    except etree.XSLTApplyError as e:
        logging.error(f"❌ XSLT transformation error: {e}")
        return []

def submit_to_alma(xml_content, record_type):
    """
    Posts XML content to Alma and handles responses.

    Parameters:
    - xml_content (str): The XML content to post.
    - record_type (str): Type of record ('Work' or 'Instance').

    Returns:
    - list: List of MMS IDs if successful, else empty list.
    """
    alma_uri = build_alma_uri(ALMA_URI_REGION, ALMA_API_KEY)
    headers = {
        "Content-Type": "application/xml; charset=utf-8",
        "Accept": "application/xml",
        "x-api-key": ALMA_API_KEY,
    }
    logging.debug(f"📥 Posting XML to Alma URI: {alma_uri}")
    
    try:
        response = requests.post(
            alma_uri,
            headers=headers,
            data=xml_content.encode('utf-8'),
        )
        logging.debug(f"DEBUG: Alma result status code: {response.status_code}")
        logging.debug(f"DEBUG: Alma result text: {response.text}")
        response.raise_for_status()
        
        # Parse successful response
        tree = etree.fromstring(response.content)
        
        # Search for 'mms_id' without namespace
        mms_id_element = tree.find('.//mms_id')
        if mms_id_element is not None and mms_id_element.text:
            logging.info(f"✅ Created '{record_type}' record with MMS ID: {mms_id_element.text}")
            return [mms_id_element.text]
        else:
            logging.error("❌ MMS ID not found in Alma response.")
            return []
    except requests.exceptions.HTTPError as e:
        # logging.debug(f"DEBUG: HTTPError encountered: {e}")
        if response.status_code == 400:
            # logging.debug("🔄 Handling 400 error by parsing MMS ID from error response.")
            mms_ids = parse_400(response.content)
            updated_mms_ids = []
            for mms_id in mms_ids:
                alma_update_uri = (
                    f"{ALMA_URI_REGION}/almaws/v1/bibs/{mms_id}?"
                    "normalization=&validate=false&override_warning=true"
                    "&override_lock=false&stale_version_check=false&cataloger_level="
                    "&check_match=false&import_profile=&apikey=" + ALMA_API_KEY
                )
                updated_mms_id = put_to_alma(alma_update_uri, xml_content, record_type)
                if updated_mms_id:
                    updated_mms_ids.append(updated_mms_id)
            return updated_mms_ids
        else:
            logging.error(f"❌ Error posting to Alma: {e}")
            return []
    except etree.XMLSyntaxError as e:
        logging.error(f"❌ Failed to parse Alma response XML: {e}")
        return []
    except Exception as e:
        logging.error(f"❌ Unexpected error: {e}")
        return []

def put_to_alma(alma_update_uri, xml_content, record_type):
    """
    Updates an existing Alma record with XML content.

    Parameters:
    - alma_update_uri (str): The Alma update URI.
    - xml_content (str): The XML content to update.
    - record_type (str): Type of record ('Work' or 'Instance').

    Returns:
    - str or None: The updated MMS ID if successful, else None.
    """
    headers = {
        "Content-Type": "application/xml; charset=UTF-8",
        "Accept": "application/xml",
    }
    # logging.debug(f"🔄 Sending PUT request to Alma URI: {alma_update_uri}")
    
    try:
        put_update = requests.put(
            alma_update_uri,
            headers=headers,
            data=xml_content.encode('utf-8'),
        )
        put_update.raise_for_status()
        
        # Parse the PUT response
        tree = etree.fromstring(put_update.content)
        
        # Search for 'mms_id' without namespace
        mms_id_element = tree.find('.//mms_id')
        if mms_id_element is None:
            # logging.debug("🔍 'mms_id' not found directly. Attempting namespace-agnostic search.")
            # Attempt namespace-agnostic search
            for elem in tree.iter():
                if etree.QName(elem.tag).localname == 'mms_id':
                    mms_id_element = elem
                    break
        
        if mms_id_element is not None and mms_id_element.text:
            logging.info(f"✅ Updated '{record_type}' record with MMS ID: {mms_id_element.text}")
            return mms_id_element.text
        else:
            logging.error("❌ MMS ID not found in PUT response.")
            return None
    except requests.exceptions.HTTPError as e:
        logging.error(f"❌ Error updating Alma record: {e}")
        return None
    except etree.XMLSyntaxError as e:
        logging.error(f"❌ Failed to parse PUT response XML: {e}")
        return None
    except Exception as e:
        logging.error(f"❌ Unexpected error during PUT: {e}")
        return None

In [4]:
import logging
import requests
from lxml import etree
from urllib.parse import urlencode, urljoin
import json
from lxml.builder import ElementMaker

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(message)s')

# Securely load Alma API credentials from environment variables
ALMA_URI_REGION = os.getenv("ALMA_URI_REGION")
ALMA_API_KEY = os.getenv("ALMA_API_KEY")

if not ALMA_URI_REGION or not ALMA_API_KEY:
    logging.error("❌ Alma URI region or API key not set in environment variables.")
    raise EnvironmentError("Alma URI region or API key not set.")

def build_alma_uri(uri_region, alma_api_key):
    """
    Constructs the Alma API URI by concatenating the provided segments.

    Parameters:
    - uri_region (str): Base URL for the Alma API.
    - alma_api_key (str): Alma API key.

    Returns:
    - str: The fully constructed Alma API URI.
    """
    base_path = "/almaws/v1/bibs"
    query_params = {
        "from_nz_mms_id": "",
        "from_cz_mms_id": "",
        "normalization": "",
        "validate": "false",
        "override_warning": "true",
        "check_match": "false",
        "import_profile": "",
        "apikey": alma_api_key
    }
    query_string = urlencode(query_params)
    alma_uri = urljoin(uri_region, base_path) + "?" + query_string
    logging.debug(f"🔗 Constructed Alma URI: {alma_uri}")
    return alma_uri

def parse_400(result):
    """
    Parses the 400 response from Alma and extracts the MMS ID(s) using XSLT.

    Parameters:
    - result (bytes): The XML response content.

    Returns:
    - list: A list of extracted MMS IDs.
    """
    try:
        xml_response = etree.fromstring(result)
        xslt = etree.parse("xsl/put_mms_id.xsl")
        transform = etree.XSLT(xslt)
        transformed = transform(xml_response)
        mms_ids_str = str(transformed).strip()
        logging.debug(f"Extracted MMS IDs string: '{mms_ids_str}'")
        
        # Split the extracted MMS IDs by newline (in case of multiple IDs)
        if mms_ids_str:
            mms_ids = [
                mms_id for mms_id in mms_ids_str.split('\n') 
                if mms_id and mms_id != "No text found in brackets"
            ]
            logging.debug(f"Extracted MMS IDs list: {mms_ids}")
            return mms_ids
        else:
            logging.error("❌ No MMS IDs extracted from 400 response.")
            return []
    except etree.XMLSyntaxError as e:
        logging.error(f"❌ Failed to parse 400 response XML: {e}")
        return []
    except etree.XSLTApplyError as e:
        logging.error(f"❌ XSLT transformation error: {e}")
        return []

def submit_to_alma(xml_content, record_type):
    """
    Posts XML content to Alma and handles responses.

    Parameters:
    - xml_content (str): The XML content to post.
    - record_type (str): Type of record ('Work' or 'Instance').

    Returns:
    - list: List of MMS IDs if successful, else empty list.
    """
    alma_uri = build_alma_uri(ALMA_URI_REGION, ALMA_API_KEY)
    headers = {
        "Content-Type": "application/xml; charset=utf-8",
        "Accept": "application/xml",
        "x-api-key": ALMA_API_KEY,
    }
    logging.debug(f"📥 Posting XML to Alma URI: {alma_uri}")
    
    try:
        response = requests.post(
            alma_uri,
            headers=headers,
            data=xml_content.encode('utf-8'),
        )
        logging.debug(f"DEBUG: Alma result status code: {response.status_code}")
        logging.debug(f"DEBUG: Alma result text: {response.text}")
        response.raise_for_status()
        
        # Parse successful response
        tree = etree.fromstring(response.content)
        
        # Search for 'mms_id' without namespace
        mms_id_element = tree.find('.//mms_id')
        if mms_id_element is not None and mms_id_element.text:
            logging.info(f"✅ Created '{record_type}' record with MMS ID: {mms_id_element.text}")
            return [mms_id_element.text]
        else:
            logging.error("❌ MMS ID not found in Alma response.")
            return []
    except requests.exceptions.HTTPError as e:
        logging.debug(f"DEBUG: HTTPError encountered: {e}")
        if response.status_code == 400:
            logging.debug("🔄 Handling 400 error by parsing MMS ID from error response.")
            mms_ids = parse_400(response.content)
            updated_mms_ids = []
            for mms_id in mms_ids:
                alma_update_uri = (
                    f"{ALMA_URI_REGION}/almaws/v1/bibs/{mms_id}?"
                    "normalization=&validate=false&override_warning=true"
                    "&override_lock=false&stale_version_check=false&cataloger_level="
                    "&check_match=false&import_profile=&apikey=" + ALMA_API_KEY
                )
                updated_mms_id = put_to_alma(alma_update_uri, xml_content, record_type)
                if updated_mms_id:
                    updated_mms_ids.append(updated_mms_id)
            return updated_mms_ids
        else:
            logging.error(f"❌ Error posting to Alma: {e}")
            return []
    except etree.XMLSyntaxError as e:
        logging.error(f"❌ Failed to parse Alma response XML: {e}")
        return []
    except Exception as e:
        logging.error(f"❌ Unexpected error: {e}")
        return []

def put_to_alma(alma_update_uri, xml_content, record_type):
    """
    Updates an existing Alma record with XML content.

    Parameters:
    - alma_update_uri (str): The Alma update URI.
    - xml_content (str): The XML content to update.
    - record_type (str): Type of record ('Work' or 'Instance').

    Returns:
    - str or None: The updated MMS ID if successful, else None.
    """
    headers = {
        "Content-Type": "application/xml; charset=UTF-8",
        "Accept": "application/xml",
    }
    logging.debug(f"🔄 Sending PUT request to Alma URI: {alma_update_uri}")
    
    try:
        put_update = requests.put(
            alma_update_uri,
            headers=headers,
            data=xml_content.encode('utf-8'),
        )
        logging.debug(f"DEBUG: PUT update status code: {put_update.status_code}")
        logging.debug(f"DEBUG: PUT update text: {put_update.text}")
        put_update.raise_for_status()
        
        # Parse the PUT response
        tree = etree.fromstring(put_update.content)
        
        # Search for 'mms_id' without namespace
        mms_id_element = tree.find('.//mms_id')
        if mms_id_element is None:
            logging.debug("🔍 'mms_id' not found directly. Attempting namespace-agnostic search.")
            # Attempt namespace-agnostic search
            for elem in tree.iter():
                if etree.QName(elem.tag).localname == 'mms_id':
                    mms_id_element = elem
                    break
        
        if mms_id_element is not None and mms_id_element.text:
            logging.info(f"✅ Updated '{record_type}' record with MMS ID: {mms_id_element.text}")
            return mms_id_element.text
        else:
            logging.error("❌ MMS ID not found in PUT response.")
            return None
    except requests.exceptions.HTTPError as e:
        logging.error(f"❌ Error updating Alma record: {e}")
        return None
    except etree.XMLSyntaxError as e:
        logging.error(f"❌ Failed to parse PUT response XML: {e}")
        return None
    except Exception as e:
        logging.error(f"❌ Unexpected error during PUT: {e}")
        return None

def process_xml_and_post(xml_url):
    """
    Fetches XML from the given URL, processes it, posts to Alma, and returns MMS IDs.

    Parameters:
    - xml_url (str): URL of the XML resource.

    Returns:
    - dict: Contains 'work_mms_id' and 'instance_mms_id'.
    """
    try:
        # Fetch XML data
        print(f"📥 Fetching XML from URL: {xml_url}")
        response = requests.get(xml_url)
        response.raise_for_status()
        xml_data = response.content
        print("✅ Successfully fetched XML data.")
        
        # Parse XML
        tree = etree.fromstring(xml_data)
        namespaces = {k: v for k, v in tree.nsmap.items() if k}
        print(f"🧩 Extracted namespaces: {namespaces}")
        
        # Extract bf:Work and bf:Instance
        bf_work = tree.find('.//bf:Work', namespaces)
        bf_instance = tree.find('.//bf:Instance', namespaces)
        
        # Get the 'about' attribute from bf:Work and bf:Instance
        work_about_attr = bf_work.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about') if bf_work is not None else None
        instance_about_attr = bf_instance.get('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about') if bf_instance is not None else None
        print(f"🔗 Work URL: {work_about_attr}")
        print(f"🔗 Instance URL: {instance_about_attr}")        
        
        work_mms_id = None
        instance_mms_id = None
        work_uri = None
        
        E = ElementMaker(namespace="http://id.loc.gov/ontologies/bibframe/", nsmap=namespaces)
        
        # Process bf:Work
        if bf_work is not None:
            print("🔄 Processing bf:Work element.")
            try:
                new_bf_work = E.Work(*bf_work, **bf_work.attrib)
                admin_metadata_parent = new_bf_work.find('.//bf:AdminMetadata/..', namespaces)
                if admin_metadata_parent is not None:
                    admin_metadata_elements = admin_metadata_parent.findall('.//bf:AdminMetadata', namespaces)
                    for elem in admin_metadata_elements[1:]:
                        admin_metadata_parent.remove(elem)
                bf_work_str = etree.tostring(new_bf_work, pretty_print=True, encoding='UTF-8').decode('utf-8')
                bf_work_str = bf_work_str.replace('<?xml version=\'1.0\' encoding=\'UTF-8\'?>', '').strip()
                bf_work_str = f"""<bib>
    <record_format>lcbf_work</record_format>
    <suppress_from_publishing>false</suppress_from_publishing>
    <record>
        <rdf:RDF {' '.join([f'xmlns:{k}="{v}"' for k, v in namespaces.items()])}>
            {bf_work_str}
        </rdf:RDF>
    </record>
</bib>"""
                with open('alma-work.xml', 'w') as f:
                    f.write(bf_work_str)
                print("✅ Created 'alma-work.xml'.")
                
                # Post bf:Work to Alma
                work_mms_id = submit_to_alma(bf_work_str, "Work")
                if work_mms_id:
                    print(f"📄 Work MMS ID: {work_mms_id}")
                    work_uri = work_about_attr
                    print(f"🔗 Work URI: {work_uri}")
                else:
                    print("❌ Failed to obtain Work MMS ID.")
            except Exception as e:
                print(f"❌ Error processing bf:Work: {e}")
        
        # Process bf:Instance
        if bf_instance is not None:
            print("🔄 Processing bf:Instance element.")
            try:
                new_bf_instance = E.Instance(*bf_instance, **bf_instance.attrib)
                for instance_of in new_bf_instance.findall('.//bf:instanceOf', namespaces):
                    new_bf_instance.remove(instance_of)
                if work_uri:
                    instance_of = E.instanceOf()
                    instance_of.set('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource', work_uri)
                    new_bf_instance.append(instance_of)
                bf_instance_str = etree.tostring(new_bf_instance, pretty_print=True, encoding='UTF-8').decode('utf-8')
                bf_instance_str = f"""<bib>
    <record_format>lcbf_instance</record_format>
    <suppress_from_publishing>false</suppress_from_publishing>
    <record>
        <rdf:RDF {' '.join([f'xmlns:{k}="{v}"' for k, v in namespaces.items()])}>
            {bf_instance_str}
        </rdf:RDF>
    </record>
</bib>"""
                with open('alma-instance.xml', 'w') as f:
                    f.write(bf_instance_str)
                print("✅ Created 'alma-instance.xml'.")
                
                # Post bf:Instance to Alma
                instance_mms_id = submit_to_alma(bf_instance_str, "Instance")
                if instance_mms_id:
                    print(f"📄 Instance MMS ID: {instance_mms_id}")
                else:
                    print("❌ Failed to obtain Instance MMS ID.")
            except Exception as e:
                print(f"❌ Error processing bf:Instance: {e}")
        
        # Prepare payload for Alma URI (if needed for another system)
        payload = {
            "work_mms_id": work_mms_id,
            "instance_mms_id": instance_mms_id
        }
        
        # If you need to send MMS IDs to another system, ensure alma_uri points to that system's endpoint.
        # Since Alma's /bibs endpoint expects XML, avoid posting JSON to it.
        # Uncomment and adjust the following block only if alma_uri is intended to receive JSON data.

        """
        # Post MMS IDs back to Alma's API (Only if alma_uri is a different endpoint that accepts JSON)
        if work_mms_id or instance_mms_id:
            try:
                headers = {'Content-Type': 'application/json'}
                response = requests.post(alma_uri, data=json.dumps(payload), headers=headers)
                response.raise_for_status()
                print("✅ Successfully posted MMS IDs back to Alma's API.")
                print("🔄 Response:")
                print(json.dumps(response.json(), indent=4))
            except Exception as e:
                print(f"❌ Error posting MMS IDs to Alma's API: {e}")
        else:
            print("❌ No MMS IDs to post back to Alma's API.")
        """

        # Instead, return the final result without posting back to Alma's /bibs endpoint
        final_result = {
            "work_mms_id": work_mms_id,
            "instance_mms_id": instance_mms_id
        }
        # Removed the print statement to prevent duplicate output
        return final_result
    except Exception as e:
        print(f"❌ Unexpected error in process_xml_and_post: {e}")
        return {}

# Execute the processing function without building and passing Alma URI
result = process_xml_and_post(
    xml_url=xml_url
)

# Display the final result
print("\n📊 Final Result:")
print(json.dumps(result, indent=4))

📥 Fetching XML from URL: https://bibframe.org/marva/api-production/ldp/e1730979
✅ Successfully fetched XML data.
🧩 Extracted namespaces: {'bflc': 'http://id.loc.gov/ontologies/bflc/', 'bf': 'http://id.loc.gov/ontologies/bibframe/', 'bfsimple': 'http://id.loc.gov/ontologies/bfsimple/', 'madsrdf': 'http://www.loc.gov/mads/rdf/v1#', 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#', 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'lclocal': 'http://id.loc.gov/ontologies/lclocal/', 'pmo': 'http://performedmusicontology.org/ontology/', 'datatypes': 'http://id.loc.gov/datatypes/', 'xsd': 'http://www.w3.org/2001/XMLSchema#', 'mstatus': 'https://id.loc.gov/vocabulary/mstatus/', 'mnotetype': 'http://id.loc.gov/vocabulary/mnotetype/', 'dcterms': 'http://purl.org/dc/terms/', 'owl': 'http://www.w3.org/2002/07/owl#', 'void': 'http://rdfs.org/ns/void#', 'lcc': 'http://id.loc.gov/ontologies/lcc#'}
🔗 Work URL: http://id.loc.gov/resources/works/23679002
🔗 Instance URL: http://id.loc.gov/resources/i

INFO:✅ Updated 'Work' record with MMS ID: 9779509524403681


📄 Work MMS ID: ['9779509524403681']
🔗 Work URI: http://id.loc.gov/resources/works/23679002
🔄 Processing bf:Instance element.
✅ Created 'alma-instance.xml'.


INFO:✅ Updated 'Instance' record with MMS ID: 9979509723603681


📄 Instance MMS ID: ['9979509723603681']

📊 Final Result:
{
    "work_mms_id": [
        "9779509524403681"
    ],
    "instance_mms_id": [
        "9979509723603681"
    ]
}
