In [1]:
!pip install lxml



In [3]:
from io import StringIO
from lxml import etree

In [2]:
# example
# from lxml import etree

# dtd = etree.DTD(StringIO("""<!ELEMENT foo EMPTY>"""))
# root = etree.XML("<foo/>")
# print(dtd.validate(root))
# # True

# root = etree.XML("<foo>bar</foo>")
# print(dtd.validate(root))
# # False
# print(dtd.error_log.filter_from_errors())

In [166]:
def validateDTD(XMLfile: str, DTDSchema: str) -> bool:

    dtd_schema = etree.DTD(open(DTDSchema, 'rb'))

    xml_doc = etree.parse(XMLfile)
    result = dtd_schema.validate(xml_doc)

    if result:
        print("File Validated")
    else:
        print("Your XML file does not conform to its DTD Schema")

In [167]:
validateDTD('XML.xml', 'DTD.dtd')

File Validated


In [168]:
from lxml import etree, objectify
dtd = etree.DTD(open('DTD.dtd', 'rb'))

In [169]:
import xml.etree.ElementTree as ET
xml_path = 'XML.xml'
xml_doc = etree.parse(xml_path)

In [170]:
dtd.validate(xml_doc)

True

In [179]:
import lxml
from lxml import etree

dtd_string = """
<!ELEMENT page (title)>
<!ELEMENT title (#PCDATA)>
"""

xml = """
<page>
  <title>Hello</title>
 </page>
 """

dtd = etree.DTD(StringIO(dtd_string))
root = etree.fromstring(xml)
is_valid = dtd.validate(root)

In [182]:
is_valid

True

In [14]:
import lxml
from lxml import etree

dtd_string = """
<!ELEMENT note (to,from,heading,body)>
<!ELEMENT to (#PCDATA)>
<!ELEMENT from (#PCDATA)>
<!ELEMENT heading (#PCDATA)>
<!ELEMENT body (#PCDATA)>
"""

xml = """
<note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
<body>Don't forget me this weekend!</body>
</note>
 """

dtd = etree.DTD(StringIO(dtd_string))
root = etree.fromstring(xml)
is_valid = dtd.validate(root)

In [15]:
is_valid

True

In [None]:
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema">
  <xs:element name="record" type="recordType"/>
  <xs:complexType name="artistType">
    <xs:sequence>
      <xs:element type="xs:string" name="record"/>
    </xs:sequence>
  </xs:complexType>
  <xs:complexType name="recordType">
    <xs:sequence>
      <xs:element type="artistType" name="artist"/>
      <xs:element type="xs:string" name="year"/>
      <xs:element type="xs:string" name="contributor"/>
    </xs:sequence>
  </xs:complexType>
</xs:schema>

In [149]:
dtd = etree.DTD('DTD_fixed.dtd')

In [160]:
root = etree.XML("""
<record>
<artist>text</artist>
<year>text</year>
<contributor>text</contributor>
</record>
""")

In [159]:
dtd_string = """
        <!ELEMENT record (artist|year|contributor)*>
        <!ELEMENT artist (record)*>
        <!ELEMENT year (#PCDATA)>
        <!ELEMENT contributor (#PCDATA)>
"""

dtd = etree.DTD(StringIO(dtd_string))
print(dtd.validate(root))

False


In [162]:
dtd_string = """
<!ELEMENT record (artist|year|contributor)+>
<!ELEMENT artist (#PCDATA)>
<!ELEMENT year (#PCDATA)>
<!ELEMENT contributor (#PCDATA)>
"""

dtd = etree.DTD(StringIO(dtd_string))
print(dtd.validate(root))

True


In [131]:
dtd = etree.DTD(StringIO("""<!DOCTYPE record [
  <!ELEMENT record ((artist |year |contributor)+)>
  <!ELEMENT artist (record)+>
  <!ELEMENT year (#PCDATA)>
  <!ELEMENT contributor (#PCDATA)>
]>

"""))
root = etree.XML("<foo/>")
print(dtd.validate(root))
# True

root = etree.XML("<foo>bar</foo>")
print(dtd.validate(root))
# False
print(dtd.error_log.filter_from_errors())

False
False
<string>:1:0:ERROR:VALID:DTD_UNKNOWN_ELEM: No declaration for element foo


In [51]:
import lxml
from lxml import etree

dtd_string = """
<!ELEMENT page (title)>
<!ELEMENT title (#PCDATA)>
"""

xml = """
<!DOCTYPE record [         
<!ELEMENT record (artist|year|contributor)*>         
<!ELEMENT artist (record)*>        
 <!ELEMENT year (#PCDATA)>         
<!ELEMENT contributor (#PCDATA)>        
 ]> 
<record>   
<artist>
<record>text</record>
</artist>
<year>
text
</year>   
<contributor>
text
</contributor>
</record>

 """

# dtd = etree.DTD(StringIO(dtd_string))
root = etree.fromstring(xml)
# is_valid = dtd.validate(root)

In [52]:
root

<Element record at 0x12371f480>

In [61]:
xsd_path = 'xml_schema.xsd'

In [62]:
xmlschema_doc = etree.parse(xsd_path)

In [63]:
xmlschema = etree.XMLSchema(xmlschema_doc)

In [64]:
xmlschema.validate(root)

True

In [90]:
def validateSchema(XMLfile: str, XMLSchema: str) -> bool:

    xml_schema_doc = etree.parse(XMLSchema)
    xml_schema = etree.XMLSchema(xml_schema_doc)

    xml_doc = etree.parse(XMLfile)
    result = xml_schema.validate(xml_doc)

    if result:
        print("File Validated")
    else:
        print("Your XML file does not conform to its XML Schema")
        
def validateDTD(XMLfile: str, DTDSchema: str) -> bool:

    dtd_schema = etree.DTD(DTDSchema)

    xml_doc = etree.parse(XMLfile)
    result = dtd_schema.validate(xml_doc)

    if result:
        print("File Validated")
    else:
        print("Your XML file does not conform to its DTD Schema")

In [81]:
validateSchema('XML.xml', 'xml_schema.xsd')

File Validated


In [82]:
validateDTD('XML.xml', 'DTD_fixed.dtd')

Your XML file does not conform to its DTD Schema


In [None]:
def task_a(xml_path):
    validateSchema('XML.xml', 'xml_schema.xsd')
    validateDTD('XML.xml', 'DTD_fixed.dtd')

In [98]:
def validateXML(XMLfile: str, ValidationFile: str) -> bool:
    try:
        if ValidationFile[-4:] == '.dtd':
            validateDTD(XMLfile, ValidationFile)
        elif ValidationFile[-4:] == '.xsd':
            validateSchema(XMLfile, ValidationFile)
        else:
            raise Exception("Validation File is not correct")
    except:
        raise Exception("XML file is not correct")

In [99]:
validateXML('XML.xml', 'xml_schema.xsd')

File Validated


In [100]:
validateXML('XML.xml', 'DTD_fixed.dtd')

Your XML file does not conform to its DTD Schema


In [146]:
def task_b(xml_path):
    validateXML(xml_path, 'xml_schema.xsd')
    validateXML(xml_path, 'DTD_fixed.dtd')

In [147]:
task_b('XML.xml')

File Validated
Your XML file does not conform to its DTD Schema


In [135]:
import xmltodict

def task_c(xml_path):
    with open(xml_path) as fd:
        data = xmltodict.parse(fd.read())
        data['record'].pop('contributor', None)

        data['record']['contributor'] = {}

        data['record']['contributor']['name'] = 'abhinav'
        data['record']['contributor']['gender'] = 'male'

    with open('result.xml', 'w') as result_file:
        result_file.write(xmltodict.unparse(data, pretty=True))

In [145]:
import xmltodict

def task_d(result_xml_path):
    validateXML(result_xml_path, 'xml_schema.xsd')
    validateXML(result_xml_path, 'DTD_fixed.dtd')

    with open(result_xml_path) as fd:
        data = xmltodict.parse(fd.read())
        
    data['record'].pop('contributor', None)
    data['record']['contributor'] = 'text'
    
    with open(result_xml_path, 'w') as result_file:
        result_file.write(xmltodict.unparse(data, pretty=True))
    
    validateXML(result_xml_path, 'xml_schema.xsd')
    validateXML(result_xml_path, 'DTD_fixed.dtd')

In [136]:
validateXML('result.xml', 'xml_schema.xsd')

Your XML file does not conform to its XML Schema


In [137]:
validateXML('result.xml', 'DTD_fixed.dtd')

Your XML file does not conform to its DTD Schema


In [138]:
import xmltodict

result_xml_path = 'result.xml'
with open(result_xml_path) as fd:
    data = xmltodict.parse(fd.read())

In [139]:
data

{'record': {'artist': {'record': 'text'},
  'year': 'text',
  'contributor': {'name': 'abhinav', 'gender': 'male'}}}

In [140]:
data['record'].pop('contributor', None)

{'name': 'abhinav', 'gender': 'male'}

In [141]:
data['record']['contributor'] = 'text'

In [142]:
data

{'record': {'artist': {'record': 'text'},
  'year': 'text',
  'contributor': 'text'}}

In [143]:
with open('result.xml', 'w') as result_file:
    result_file.write(xmltodict.unparse(data, pretty=True))

In [144]:
validateXML('result.xml', 'xml_schema.xsd')

File Validated


In [8]:
def validate(xml_path: str, xsd_path: str) -> bool:

    xmlschema_doc = etree.parse(xsd_path, dtd)
    xmlschema = etree.XMLSchema(xmlschema_doc)

    xml_doc = etree.parse(xml_path)
    result = xmlschema.validate(xml_doc)

    return result

In [9]:
validate('XML.xml', 'DTD.dtd')

XMLSyntaxError: Start tag expected, '<' not found, line 7, column 1 (DTD.dtd, line 7)

In [4]:
doc

{'record': {'artist': {'record': 'text'},
  'year': 'text',
  'contributor': 'text'}}