# VL2.4 - Semi-Strukturierte Daten
## (1) XML
### Datenstruktur generieren und XML-Datei schreiben

In [26]:
import xml.etree.ElementTree as ET

name_list = ["Müller", "Meier", "Schulz", "Peters"]

# Aufbau des XML-Baumes

# Wurzelelement "names"
root = ET.Element("names")

# Einfügen einzelnen Namen unter den Wurzelknoten
for name in name_list:
    subelement = ET.SubElement(root, "name")  # element anlegen
    subelement.text = name

# Konvertierung in einen Baum und Speicherung als XML-Datei
tree = ET.ElementTree(root)
tree.write("names.xml", encoding='utf-8', xml_declaration=True)

### XML-Datei einlesen und parsen

In [38]:
import xml.etree.ElementTree as ET
tree = ET.parse('country_data.xml')
root = tree.getroot()

# iteriere durch Elemente direkt unter root
for child in root:
    # gib die Tab-Bezeichnung und alle Attribute des Subelements aus
    print(child.tag, child.attrib)

country {'name': 'Liechtenstein'}
country {'name': 'Singapore', 'lat': '1.290270', 'long': '103.851959'}
country {'name': 'Panama'}


In [42]:
# Zugriff auf Subelemente mittels Index (analog zu Listenzugriff)
root[0][1].tag

'year'

Zugriff über Methoden (vgl. https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element)

In [49]:
# Empfohlener Zugriff über Methoden
for country in root.findall('country'):
    rank = country.find('rank').text
    name = country.get('name')
    print(name, rank)

Liechtenstein 1
Singapore 4
Panama 68


In [52]:
import xml.dom.minidom
dom = xml.dom.minidom.parse("country_data.xml")
print(dom.toprettyxml())  # "prettyprint" whole tree

<?xml version="1.0" ?>
<data>
	
    
	<country name="Liechtenstein">
		
        
		<rank>1</rank>
		
        
		<year>2008</year>
		
        
		<gdppc>141100</gdppc>
		
        
		<neighbor name="Austria" direction="E"/>
		
        
		<neighbor name="Switzerland" direction="W"/>
		
    
	</country>
	
    
	<country name="Singapore" lat="1.290270" long="103.851959">
		
        
		<rank>4</rank>
		
        
		<year>2011</year>
		
        
		<gdppc>59900</gdppc>
		
        
		<neighbor name="Malaysia" direction="N"/>
		
    
	</country>
	
    
	<country name="Panama">
		
        
		<rank>68</rank>
		
        
		<year>2011</year>
		
        
		<gdppc>13600</gdppc>
		
        
		<neighbor name="Costa Rica" direction="W"/>
		
        
		<neighbor name="Colombia" direction="E"/>
		
    
	</country>
	

</data>



## (2) JSON

In [78]:
import json

with open('country_data.json') as f:
    data = json.load(f)

print(type(data))
print(data)

<class 'dict'>
{'data': {'country': [{'name': 'Liechtenstein', 'rank': 1, 'year': 2008, 'gdppc': 141100, 'neighbor': [{'name': 'Austria', 'direction': 'E'}, {'name': 'Switzerland', 'direction': 'W'}]}, {'name': 'Singapore', 'lat': 1.29027, 'long': 103.851959, 'rank': 4, 'year': 2011, 'gdppc': 59900, 'neighbor': {'name': 'Malaysia', 'direction': 'N'}}, {'name': 'Panama', 'rank': 68, 'year': 2011, 'gdppc': 13600, 'neighbor': [{'name': 'Costa Rica', 'direction': 'W'}, {'name': 'Colombia', 'direction': 'E'}]}]}}


In [63]:
import json

people = {'people' : [{'name':'Scott', 'age':31}, {'name':'Anne', 'age':29, 'married':False}]}
with open('people.json', 'w') as f:
    json.dump(people, f, indent=4) # indent sorgt für ein "pretty-print"

## (3) YAML

In [86]:
import yaml

with open("country_data.yaml") as f:
    yaml_data = yaml.safe_load(f)
    

print(type(yaml_data))
yaml_data

<class 'dict'>


{'data': {'country': [{'gdppc': 141100,
    'name': 'Liechtenstein',
    'neighbor': [{'direction': 'E', 'name': 'Austria'},
     {'direction': 'W', 'name': 'Switzerland'}],
    'rank': 1,
    'year': 2008},
   {'gdppc': 59900,
    'lat': 1.29027,
    'long': 103.851959,
    'name': 'Singapore',
    'neighbor': {'direction': 'N', 'name': 'Malaysia'},
    'rank': 4,
    'year': 2011},
   {'gdppc': 13600,
    'name': 'Panama',
    'neighbor': [{'direction': 'W', 'name': 'Costa Rica'},
     {'direction': 'E', 'name': 'Colombia'}],
    'rank': 68,
    'year': 2011}]}}

In [87]:
people = {'people' : [{'name':'Scott', 'age':31}, {'name':'Anne', 'age':29, 'married':False}]}
with open('people.yaml', 'w') as f:
    yaml.dump(people, f)