In [0]:
%fs head '/FileStore/tables/demo.xml'

In [0]:
df = spark.read.format('xml').option('rootTag','data').option('rowTag','record').load('/FileStore/tables/demo.xml')
df.show()

+---+--------+---+
|_id|    name|rid|
+---+--------+---+
|  1|Record 1|  1|
|  2|Record 2|  2|
|  3|Record 3|  3|
+---+--------+---+



In [0]:
df.printSchema()

root
 |-- _id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- rid: long (nullable = true)



In [0]:
from pyspark.sql.types import *
schema = StructType([
    StructField('_id', IntegerType(),False),
    StructField('rid', IntegerType(),False),
    StructField('name', StringType(),False)
])

In [0]:
df1 = spark.read.format('xml').option('rootTag','data').option('rowTag','record').load('/FileStore/tables/demo.xml',schema=schema)
df1.show()

+---+---+--------+
|_id|rid|    name|
+---+---+--------+
|  1|  1|Record 1|
|  2|  2|Record 2|
|  3|  3|Record 3|
+---+---+--------+



In [0]:
df1.printSchema()

root
 |-- _id: integer (nullable = false)
 |-- rid: integer (nullable = false)
 |-- name: string (nullable = false)



In [0]:
df2 = df1.select("rid","name")
display(df2)

rid,name
1,Record 1
2,Record 2
3,Record 3


In [0]:
df2.write.mode('overwrite').format('xml').save('/FileStore/tables/demo_xml_output')

In [0]:
%fs head '/FileStore/tables/demo_xml_output/part-00000'

In [0]:
df2.write.mode('overwrite').format('xml').option('rootTag','data').option('rowTag','record').save('/FileStore/tables/demo_xml_output1')

In [0]:
%fs head '/FileStore/tables/demo_xml_output1/part-00000'

In [0]:
%fs head '/FileStore/tables/Demo_2-1.xml'

In [0]:
dff = spark.read.format('xml').option('rootTag','catalog').option('rowTag','book').load('/FileStore/tables/Demo_2-1.xml')
dff.show()

+-----+--------------------+--------------------+--------+-----+-------------+--------------------+
|  _id|              author|         description|   genre|price|publish_dates|               title|
+-----+--------------------+--------------------+--------+-----+-------------+--------------------+
|bk101|Gambardella, Matthew|An in-depth look ...|Computer|44.95| {2000-10-01}|XML Developer's G...|
|bk102|          Ralls, Kim|A former architec...| Fantasy| 5.95| {2000-12-16}|       Midnight Rain|
|bk103|         Corets, Eva|After the collaps...| Fantasy| 5.95| {2000-11-17}|     Maeve Ascendant|
|bk104|       Knorr, Stefan|An anthology of h...|   Hrror| 4.95| {2000-12-06}|     Creepy Crawlies|
+-----+--------------------+--------------------+--------+-----+-------------+--------------------+



In [0]:
from pyspark.sql.functions import split
dff1 = dff.select("author","description","genre","price","publish_dates","title").withColumn('first_name',split(dff.author,',')[1]).withColumn('last_name',split(dff.author,',')[0]).drop(dff.author)
dff1.show()

+--------------------+--------+-----+-------------+--------------------+----------+-----------+
|         description|   genre|price|publish_dates|               title|first_name|  last_name|
+--------------------+--------+-----+-------------+--------------------+----------+-----------+
|An in-depth look ...|Computer|44.95| {2000-10-01}|XML Developer's G...|   Matthew|Gambardella|
|A former architec...| Fantasy| 5.95| {2000-12-16}|       Midnight Rain|       Kim|      Ralls|
|After the collaps...| Fantasy| 5.95| {2000-11-17}|     Maeve Ascendant|       Eva|     Corets|
|An anthology of h...|   Hrror| 4.95| {2000-12-06}|     Creepy Crawlies|    Stefan|      Knorr|
+--------------------+--------+-----+-------------+--------------------+----------+-----------+



In [0]:
dff1.write.mode('overwrite').format('xml').save('/FileStore/tables/demo2_xml_output')

In [0]:
%python
dbutils.fs.ls('/FileStore/tables/demo2_xml_output')

Out[24]: [FileInfo(path='dbfs:/FileStore/tables/demo2_xml_output/_SUCCESS', name='_SUCCESS', size=0, modificationTime=1664259743000),
 FileInfo(path='dbfs:/FileStore/tables/demo2_xml_output/part-00000', name='part-00000', size=1736, modificationTime=1664259743000)]

In [0]:
%fs head 'dbfs:/FileStore/tables/demo2_xml_output/part-00000'

In [0]:
dff1.write.mode('overwrite').format('xml').option('rootTag','catalog').option('rowTag','book').save('/FileStore/tables/demo2_xml_output1')

In [0]:
%fs head 'dbfs:/FileStore/tables/demo2_xml_output1/part-00000'