#### XML literals

In [None]:
import scala.xml._

import scala.xml._


In [None]:
val doc = <html><head><title>Fred's Memoirs</title></head><body>test xml </body></html>

doc: scala.xml.Elem = <html><head><title>Fred's Memoirs</title></head><body>test xml </body></html>


In [None]:
val elem = <a href="http://scala-lang.org">The <em>Scala</em> language</a>
println(elem.label)
println(elem.child) // node sequences

for (n <- elem.child) println(n)

a
ArrayBuffer(The , <em>Scala</em>,  language)
The 
<em>Scala</em>
 language
elem: scala.xml.Elem = <a href="http://scala-lang.org">The <em>Scala</em> language</a>


#### Attributes

In [None]:
val elem = <a href="http://scala-lang.org">The Scala language</a>
var meta = elem.attributes 

elem: scala.xml.Elem = <a href="http://scala-lang.org">The Scala language</a>
meta: scala.xml.MetaData =  href="http://scala-lang.org"


`attributes` yields an object of type MetaData，like a Map from keys to values

use `()` operator to access the value , yields a node sequence

In [None]:
val url = elem.attributes("href")

val image = <img alt="San Jos&eacute; State University Logo" src="http://www.sjsu.edu/publicaffairs/pics/sjsu_logo_color_web.jpg"/>
val alt = image.attributes("alt") 

url: Seq[scala.xml.Node] = http://scala-lang.org
image: scala.xml.Elem = <img alt="San Jos&eacute; State University Logo" src="http://www.sjsu.edu/publicaffairs/pics/sjsu_logo_color_web.jpg"/>
alt: Seq[scala.xml.Node] = ArrayBuffer(San Jos, &eacute;,  State University Logo)


turn node sequence into a string by `.text`

In [None]:
alt.text

res261: String = San Jos&eacute; State University Logo


turn node sequence into a map by `asAttrMap`

In [None]:
meta.asAttrMap

res263: Map[String,String] = Map(href -> http://scala-lang.org)


#### Embedded Expressions

In [None]:
<ul>{for (i <- 1 to 5) yield <li>option {i}</li>}</ul> //scala block inside a scala block

res259: scala.xml.Elem = <ul><li>option 1</li><li>option 2</li><li>option 3</li><li>option 4</li><li>option 5</li></ul>


In [None]:
//producing braces
<h3>The Natural Numbers {{1, 2, 3, ...}}</h3>

res265: scala.xml.Elem = <h3>The Natural Numbers {1, 2, 3, ...}</h3>


compute attribute with expression

In [None]:
<a att1={if (true) "desc1" else "desc"} href="#">empty link</a>

res267: scala.xml.Elem = <a att1="desc1" href="#">empty link</a>


#### CDATA

In [None]:
val js = <script><![CDATA[ alert("Cold!")]]></script>

val g1 = <xml:group><li>Item 1</li><li>Item 2</li></xml:group>
val g2 = Group(Seq(<li>Item 1</li>, <li>Item 2</li>))

// iterate
val items = <li>Item 1</li><li>Item 2</li>
for (n <- <xml:group>{items}</xml:group>) yield n
// Yields two li elements
for (n <- <ol>{items}</ol>) yield n
// Yields one ol element

js: scala.xml.Elem = <script> alert(&quot;Cold!&quot;)</script>
g1: scala.xml.Group = <li>Item 1</li><li>Item 2</li>
g2: scala.xml.Group = <li>Item 1</li><li>Item 2</li>
items: scala.xml.NodeBuffer = ArrayBuffer(<li>Item 1</li>, <li>Item 2</li>)
res269: scala.xml.NodeSeq = NodeSeq(<ol><li>Item 1</li><li>Item 2</li></ol>)


#### XPath-like

> use `\` ,`\\` replace `/` ,`//` (`//` denotes comments)

In [None]:
val list = <dl><dt>Java</dt><dd>Gosling</dd><dt>Scala</dt><dd>Odersky</dd></dl>
val languages = list \ "dt"

list: scala.xml.Elem = <dl><dt>Java</dt><dd>Gosling</dd><dt>Scala</dt><dd>Odersky</dd></dl>
languages: scala.xml.NodeSeq = NodeSeq(<dt>Java</dt>, <dt>Scala</dt>)


`_` wildcard, matches any element 

In [None]:
val long = <html><body><ul data="ul">{for (i <- 1 to 2) yield <li>a{i}</li>}</ul><ol data="ol">{for (i <- 1 to 2) yield <li>b{i}</li>}</ol></body></html>
long \ "html" \ "_" \ "li"  // matches nothing
long \ "body" \ "_" \ "li" //matches all li

long: scala.xml.Elem = <html><body><ul data="ul"><li>a1</li><li>a2</li></ul><ol data="ol"><li>b1</li><li>b2</li></ol></body></html>
res272: scala.xml.NodeSeq = NodeSeq(<li>a1</li>, <li>a2</li>, <li>b1</li>, <li>b2</li>)


`\\` locates descendants at any depth

In [None]:
long \\ "li"

res274: scala.xml.NodeSeq = NodeSeq(<li>a1</li>, <li>a2</li>, <li>b1</li>, <li>b2</li>)


`@` locates attributes   (no wildcard for this)

In [None]:
long \\ "@data"

res276: scala.xml.NodeSeq = NodeSeq(ul, ol)


#### Pattern Matching

In [None]:
val m  = <li><img alt="image"/><a href="#">test</a></li>

m match {
//   case <li>{_}</li> => println(1)  // match one element
//   case <li>{_*}</li> => println(2) // match multi element
  case <li>{children @ _*}</li> => { // bind a variable
    println(children)
    for(c <- children) println(c)
  }
  case _ => None
}

ArrayBuffer(<img alt="image"/>, <a href="#">test</a>)
<img alt="image"/>
<a href="#">test</a>
m: scala.xml.Elem = <li><img alt="image"/><a href="#">test</a></li>
res278: Any = ()


**Note:** `case` clause should have only one element and can't have attributes

#### Modifying

before edit, have to create a copy

`copy(label, attributes, child, prefix, scope)`  paramaters are _named parameters_，specify changed paramters

In [None]:
val list = <ul><li>Fred</li><li>Wilma</li></ul>
val list2 = list.copy(label = "ol")

list: scala.xml.Elem = <ul><li>Fred</li><li>Wilma</li></ul>
list2: scala.xml.Elem = <ol><li>Fred</li><li>Wilma</li></ol>


In [None]:
list.copy(child = list.child ++ <li> add a child</li>)

In [None]:
val image = <img src="hamster.jpg"/>
val image2 = image % Attribute(null, "alt", "add an attribute", Null)
val image3 = image % Attribute(null, "alt", "add more then one attribute",Attribute(null, "src", "updateAttribute.jpg", Null))

image: scala.xml.Elem = <img src="hamster.jpg"/>
image2: scala.xml.Elem = <img alt="add an attribute" src="hamster.jpg"/>
image3: scala.xml.Elem = <img src="updateAttribute.jpg" alt="add more then one attribute"/>


#### Transforming

In [None]:
import scala.xml.transform._
val rule1 = new RewriteRule {
  override def transform(n: Node) = n match {
  case e @ <ul>{_*}</ul> => e.asInstanceOf[Elem].copy(label = "ol")
  case _ => n
  }
}
// val transformed = new RuleTransformer(rule1).transform(root)
// val transformer = new RuleTransformer(rule1, rule2, rule3); // multi rules

import scala.xml.transform._
rule1: scala.xml.transform.RewriteRule = <function1>


#### Loading and Saving

load

In [None]:
import scala.xml._
val file = "/root/git-notebook/scala/myfile.xml"
val root = XML.loadFile(file)

// using java api
import java.io.{FileInputStream, InputStreamReader}
import java.net.URL
val root2 = XML.load(new FileInputStream(file))
val root3 = XML.load(new InputStreamReader(new FileInputStream(file), "UTF-8"))
val root4 = XML.load(new URL("https://raw.githubusercontent.com/afewnotes/git-notebook/master/scala/myfile.xml"))

import scala.xml._
file: String = /root/git-notebook/scala/myfile.xml
root: scala.xml.Elem =
<root>
</root>
import java.io.{FileInputStream, InputStreamReader}
import java.net.URL
root2: scala.xml.Elem =
<root>
</root>
root3: scala.xml.Elem =
<root>
</root>
root4: scala.xml.Elem =
<root>
</root>


save 

In [None]:
XML.save(file,root)

when saving html ,use doctype

In [None]:
import scala.xml.dtd._

val appendName = file + ".html"
XML.save(appendName, root,
enc = "UTF-8",
xmlDecl = true,
doctype = DocType("html",
PublicID("-//W3C//DTD XHTML 1.0 Strict//EN",
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"),
Nil))

XML.loadFile(appendName)

import scala.xml.dtd._
appendName: String = /root/git-notebook/scala/myfile.xml.html
res287: scala.xml.Elem =
<root>
</root>


self-closing

In [None]:
val node = <li><img src="hamster.jpg"></img><img src="hamster.jpg"></img></li>
val str = xml.Utility.toXML(node, minimizeTags = true)

val str = xml.Utility.toXML(node, minimizeTags = true)
                      ^
node: scala.xml.Elem = <li><img src="hamster.jpg"></img><img src="hamster.jpg"></img></li>
str: StringBuilder = <li><img src="hamster.jpg"/><img src="hamster.jpg"/></li>


prettily

In [None]:
val printer = new PrettyPrinter(width = 100, step = 4)
val str = printer.formatNodes(node)

printer: scala.xml.PrettyPrinter = scala.xml.PrettyPrinter@3ac92714
str: String =
<li>
    <img src="hamster.jpg"></img>
    <img src="hamster.jpg"></img>
</li>


#### Exercises

In [None]:
val n = <fred/>
val a = <fred/>(0) // ?
val b = <fred/>(0)(0) // ?

n: scala.xml.Elem = <fred/>
a: scala.xml.Node = <fred/>
b: scala.xml.Node = <fred/>


In [None]:
<ul>
<li>Opening bracket: [</li>
<li>Closing bracket: ]</li>
<li>Opening brace: {{</li> // double braces
<li>Closing brace: }}</li>
</ul>

res294: scala.xml.Elem =
<ul>
<li>Opening bracket: [</li>
<li>Closing bracket: ]</li>
<li>Opening brace: {</li> // double braces
<li>Closing brace: }</li>
</ul>


In [None]:
import scala.xml._
val t1 = <li>Fred</li> match { 
  case <li>{Text(t)}</li> => t
  case _ => None
}
val t2 = <li>{"Fred"}</li> match { 
  case <li>{Text(t)}</li> => t 
  case <li>{s}</li> => s  // different
  case _ => None
}

import scala.xml._
t1: java.io.Serializable = Fred
t2: Object = Fred


In [None]:
import scala.xml._
val html = XML.loadFile("/root/git-notebook/scala/test.html")
val test= html \\ "img"
val nonalt = test.filter(f => f.attribute("alt") == None)  // get element without alt 
val srcs = html \\ "img" \\ "@src" // print all image names
val links = for (a <- html \\ "a" ) yield a.text -> a.attribute("href").get(0).toString  // build a table  name->link
links.toMap

import scala.xml._
html: scala.xml.Elem =
<html>
    <head>
        <meta charset="utf-8"/>
        <meta content="width=device-width" name="viewport"/>
        <title/>
    </head>
    <body>
        <img src="1.gif"/>
        <img src="2.gif"/>
        <img alt="3" src="3.gif"/>
        <img alt="4" src="4.gif"/>
        <a href="www.baidu.com">baidu</a>
        <a href="www.google.com">google</a>
        <a href="www.bing.com">bing</a>
    </body>
</html>
test: scala.xml.NodeSeq = NodeSeq(<img src="1.gif"/>, <img src="2.gif"/>, <img alt="3" src="3.gif"/>, <img alt="4" src="4.gif"/>)
nonalt: scala.xml.NodeSeq = NodeSeq(<img src="1.gif"/>, <img src="2.gif"/>)
srcs: scala.xml.NodeSeq = NodeSeq(1.gif, 2.gif, 3.gif, 4.gif)
links: scala.collection.immutable.Seq[(String, String)] = List((ba...

In [None]:
// map -> table
def convert(src: Map[String,String]):Elem = {<dl>{for(s <- src) yield <dt>{s._1}</dt><dd>{s._2}</dd> }</dl>}
val dl = convert(Map("a"->"1","b"->"2"))

// table -> map
// refer
// https://github.com/viktor-podzigun/scala-impatient/blob/9569bce74837102f7a98bfea828d4f6af6788f3a/src/main/scala/Chapter16.scala#L182
def reconvert(src: Elem): Map[String,String] = {
  val result = new collection.mutable.HashMap[String,String]
  var ck = "";
  for (d <- dl.child) d match {
    case <dt>{k}</dt> => ck = k.text
    case <dd>{v}</dd> => result(ck) = v.text
  }
  result.toMap
}
reconvert(dl)

convert: (src: Map[String,String])scala.xml.Elem
dl: scala.xml.Elem = <dl><dt>a</dt><dd>1</dd><dt>b</dt><dd>2</dd></dl>
reconvert: (src: scala.xml.Elem)Map[String,String]
res247: Map[String,String] = Map(b -> 2, a -> 1)


In [None]:

import scala.xml.transform._
val rule1 = new RewriteRule {
  override def transform(n: Node) = n match {
    case e @ <img>{_*}</img> if (e.attribute("alt") == None) => e.asInstanceOf[Elem] % Attribute(null, "alt", "TODO", Null)
    case _ => n
  }
}
new RuleTransformer(rule1).transform(html)

import scala.xml.transform._
rule1: scala.xml.transform.RewriteRule = <function1>
res245: Seq[scala.xml.Node] =
<html>
    <head>
        <meta charset="utf-8"/>
        <meta content="width=device-width" name="viewport"/>
        <title/>
    </head>
    <body>
        <img alt="TODO" src="1.gif"/>
        <img alt="TODO" src="2.gif"/>
        <img alt="3" src="3.gif"/>
        <img alt="4" src="4.gif"/>
        <a href="www.baidu.com">baidu</a>
        <a href="www.google.com">google</a>
        <a href="www.bing.com">bing</a>
    </body>
</html>
