# Introduction

In [1]:
# XML stands for Extensible Markup Language.
# Similar to HTML in its appearance.
# XML is used for data presentation.
# XML is exclusively designed to send and receive data.

# Parsing 

In [7]:
# Parsing means to read information from a file and split it into pieces by identifying parts of that particular XML file.

In [None]:
'''
<?xml version="1.0" encoding="UTF-8"?>
<metadata>
<food>
    <item name="breakfast">Idly</item>
    <price>$2.5</price>
    <description>Two idly's with chutney</description>
    <calories>553</calories>
</food>
<food>
    <item name="breakfast">Paper Dosa</item>
    <price>$2.7</price>
    <description>Plain Paper Dosa with chutney</description>
    <calories>700</calories>
</food>
</metadata>
'''

# Python XML Parsing Modules

In [3]:
# Formats XML data in tree structure which is the most natural representation of hierarchical data.

In [4]:
# DOM

# used by people who are proficient with DOM(Document Object Module).
# DOM applications often start by parsing XML into DOM.
# xml.dom.minidom

# xml.etree.ElementTree Module

In [5]:
# Tag : It is a string representing the type of data being stored.
# Attributes : Consist of a number of attributes stored as dictionaries.
# Text String : A text string having information that needs to be displayed.
# Tail String : Can also have tail strings if necessary.
# Child Elements : Consist of a number of child elements stored as sequences.

## 1.Parsing

In [6]:
# How to parse XML using the ElementTree Module.

# There are two ways to parse the files using this module.
# 1.Using parse() Function
# 2.Using fromstring() Function

In [10]:
# 1.parse() function : 

# This function takes XML in file format to parse it.
import xml.etree.ElementTree as ET
mytree = ET.parse('sample.xml')
myroot = mytree.getroot()
print(myroot)

<Element 'metadata' at 0x000001F8B2CB35E8>


In [8]:
# 2.Using fromstring() Function

# Parses XML supplied as a string parameter
import xml.etree.ElementTree as ET
data='''<?xml version="1.0" encoding="UTF-8"?>
<metadata>
<food>
    <item name="breakfast">Idly</item>
    <price>$2.5</price>
    <description>Two idly's with chutney</description>
    <calories>553</calories>
</food>
</metadata>
'''
myroot = ET.fromstring(data)
print(myroot)

<Element 'metadata' at 0x000001F8B2CB8D68>


## 2.Finding Elements

In [21]:
# You can find various elements and sub elements using tag, attrib, text, etc.

import xml.etree.ElementTree as ET
mytree = ET.parse('sample.xml')
myroot = mytree.getroot()
print(myroot.tag)

# Idly
print(myroot[0].tag)
print(myroot[0].attrib)

# Paper Dosa
print(myroot[1].tag)
print(myroot[1].attrib)

metadata
food
{}
food
{}


In [22]:
# Idly
for x in myroot[0]:
    print(x.tag, x.attrib)

item {'name': 'breakfast'}
price {}
description {}
calories {}


In [23]:
# Paper Dosa
for x in myroot[1]:
    print(x.tag, x.attrib)

item {'name': 'breakfast'}
price {}
description {}
calories {}


In [17]:
# Idly
for x in myroot[0]:
    print(x.text)

Idly
$2.5
Two idly's with chutney
553


In [20]:
# Paper Dosa
for x in myroot[1]:
    print(x.text)

Paper Dosa
$2.7
Plain Paper Dosa with chutney
700


In [26]:
# Idly and Paper Dosa Name and Price

for x in myroot.findall('food'):
    item = x.find('item').text
    price = x.find('price').text
    print(item,price)

Idly $2.5
Paper Dosa $2.7


## 3.Modifying

In [28]:
# XMl can be modified using functions such as set(), SubElement(), etc

In [29]:
# set()

import xml.etree.ElementTree as ET
mytree = ET.parse('sample.xml')
myroot = mytree.getroot()
for x in myroot.iter('description'):
    a = str(x.text)+'Description has been added'
    x.text = str(a)
    x.set('updated','yes')
mytree.write('new.xml')
# new.xml file ban jayegi HomePage par

In [30]:
# SubElement()

import xml.etree.ElementTree as ET
mytree = ET.parse('sample.xml')
myroot = mytree.getroot()
ET.SubElement(myroot[0],'speciality')
for x in myroot.iter('speciality'):
    b = 'South Indian Special'
    x.text = str(b)
mytree.write('new2.xml')
# new2.xml file ban jayegi HomePage par

In [31]:
# pop

import xml.etree.ElementTree as ET
mytree = ET.parse('sample.xml')
myroot = mytree.getroot()
myroot[0][0].attrib.pop('name')
mytree.write('new3.xml')
# new3.xml file ban jayegi HomePage par
# <item name="breakfast">Idly</item> me name="breakfast" deleted 

In [32]:
# remove

import xml.etree.ElementTree as ET
mytree = ET.parse('sample.xml')
myroot = mytree.getroot()
myroot[0].remove(myroot[0][0])
mytree.write('new4.xml')
# new4.xml file ban jayegi HomePage par
# <item name="breakfast">Idly</item> whole item tag deleted

In [33]:
# clear()

import xml.etree.ElementTree as ET
mytree = ET.parse('sample.xml')
myroot = mytree.getroot()
myroot[0].clear() 
mytree.write('new5.xml')
# new5.xml file ban jayegi HomePage par

# Delete Idly Ki details
# <item name="breakfast">Idly</item>
#     <price>$2.5</price>
#     <description>Two idly's with chutney</description>
#     <calories>553</calories>  

# xml.dom.minidom Module  (Minimal DOM Implementation)

## 1.Parsing

In [34]:
# parse():This function takes XML in file format to parse it

from xml.dom import minidom
# mytree = minidom.parse('sample.xml')
data = open('sample.xml')
a = minidom.parse(data)

In [36]:
# parseString():This function takes XML as a parameter to parse it

from xml.dom import minidom
data = minidom.parseString('<myxml>Using<empty/> parseString</myxml>')

## 2.Finding Elements

In [4]:
# You can find various elements and sub elements using getElementsByName(), value, data, etc.

from xml.dom import minidom
mytree = minidom.parse('sample.xml')
tagname = mytree.getElementsByTagName('item')[0]
print(tagname)
print(tagname.attributes['name'].value)
print(tagname.firstChild.data)

<DOM Element: item at 0x14a61bd6ea8>
breakfast
Idly


In [11]:
from xml.dom import minidom
mytree = minidom.parse('sample.xml')
tagname = mytree.getElementsByTagName('item')
print(tagname[1].firstChild.data)

Paper Dosa


In [12]:
from xml.dom import minidom
mytree = minidom.parse('sample.xml')
tagname = mytree.getElementsByTagName('item')
for x in tagname:
    print(x.firstChild.data)

Idly
Paper Dosa


## 3.Length of Elements

In [13]:
# You can find the length of the elements using len() function.

from xml.dom import minidom
mytree = minidom.parse('sample.xml')
tagname = mytree.getElementsByTagName('item')
print(len(tagname))

2
