In [1]:
pip install ET

Collecting ET
  Obtaining dependency information for ET from https://files.pythonhosted.org/packages/7e/52/3350143771b8c99d86a6014202ff7b92b3b78e79c23699ec730f53703bbf/et-0.0.2-py2.py3-none-any.whl.metadata
  Downloading et-0.0.2-py2.py3-none-any.whl.metadata (1.8 kB)
Downloading et-0.0.2-py2.py3-none-any.whl (5.2 kB)
Installing collected packages: ET
Successfully installed ET-0.0.2
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install requests

Note: you may need to restart the kernel to use updated packages.


In [3]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET

In [4]:
api_link = "https://www.hzdr.de/publications/OAI-PMH?verb=ListRecords&from=2024-01-01&set=HZDR:SW"

In [5]:
response = requests.get(api_link)

if response.status_code == 200:
    xml_file2024 = response.content  
else:
    raise Exception(f"Failed to fetch data {response.status_code}")

root = ET.fromstring(xml_file2024) 

In [6]:
# Define namespaces 
links = {
    "oai": "http://www.openarchives.org/OAI/2.0/",
    "dc": "http://purl.org/dc/elements/1.1/"
}

In [7]:
# Extract records
records = []
for record in root.findall(".//oai:record", links):
    title = record.find(".//dc:title", links).text if record.find(".//dc:title", links) is not None else None
    creators = [creator.text for creator in record.findall(".//dc:creator", links)]
    identifier = record.find(".//dc:identifier", links).text if record.find(".//dc:identifier", links) is not None else None
    datestamp = record.find(".//oai:datestamp", links).text if record.find(".//oai:datestamp", links) is not None else None

    # Add to records list
    if title and identifier and datestamp:
        records.append({
            "title": title,
            "creators": creators,
            "identifier": identifier,
            "datestamp": datestamp
        })

In [8]:
df = pd.DataFrame(records)

In [9]:
print("DataFrame numbers:")
print(df.columns)

DataFrame numbers:
Index(['title', 'creators', 'identifier', 'datestamp'], dtype='object')


In [10]:
record_count = len(df)
print(f"Number of records found: {record_count}")

Number of records found: 64


In [11]:
df.sort_values(by="datestamp", inplace=True)

In [12]:
# Write to XML
root_elem = ET.Element("records")

for _, row in df.iterrows():
    record_elem = ET.SubElement(root_elem, "record")
    
    title_elem = ET.SubElement(record_elem, "title")
    title_elem.text = row["title"]
    
    creators_elem = ET.SubElement(record_elem, "creators")
    for creator in row["creators"]:
        creator_elem = ET.SubElement(creators_elem, "creator")
        creator_elem.text = creator

    identifier_elem = ET.SubElement(record_elem, "identifier")
    identifier_elem.text = row["identifier"]
    
    datestamp_elem = ET.SubElement(record_elem, "datestamp")
    datestamp_elem.text = row["datestamp"]

In [13]:
# Save file
tree = ET.ElementTree(root_elem)
tree.write("output.xml", encoding="utf-8", xml_declaration=True)
print("XML file created as 'taskoutput.xml'")

XML file created as 'taskoutput.xml'


In [14]:
df

Unnamed: 0,title,creators,identifier,datestamp
0,Ansible Role GitLab,"[Hüser, C., Huste, T., Ziegner, N.]",https://www.hzdr.de/publications/Publ-32130-1,2024-05-21
34,LLAMA: The Low Level Abstraction of Memory Access,"[Matthes, A., Gruber, B. M., Widera, R.]",https://www.hzdr.de/publications/Publ-33710-2,2024-05-21
35,LLAMA: The Low Level Abstraction of Memory Access,"[Matthes, A., Gruber, B. M., Widera, R.]",https://www.hzdr.de/publications/Publ-33710-1,2024-05-21
36,LLAMA: The Low Level Abstraction of Memory Access,"[Matthes, A., Gruber, B. M., Widera, R.]",https://www.hzdr.de/publications/Publ-33710-3,2024-05-21
37,Data publication: Inverse-Dirichlet Weighting ...,"[Maddu, S., Sturm, D., Müller, C. L., Sbalzari...",https://www.hzdr.de/publications/Publ-33337-1,2024-05-21
...,...,...,...,...
18,HIFIS Software website: software.hifis.net,"[Erxleben, F., Hüser, C., Huste, T.]",https://www.hzdr.de/publications/Publ-30140-1,2024-05-21
17,ExploreASL/ExploreASL: First stable release,"[Mutsaerts, H., Petr, J., Stritt, M., Vandemae...",https://www.hzdr.de/publications/Publ-31229-1,2024-05-21
23,Common methods of spectral data analysis for u...,"[Seilmayer, M.]",https://www.hzdr.de/publications/Publ-29280-1,2024-05-21
61,Data publication: Data science education in un...,"[Shah, K., Butler, J., Knaub, A. V., Zenginoğl...",https://www.hzdr.de/publications/Publ-39493-1,2024-08-26
