### Parsing XML - Go Lang Practical Programming Tutorial p.11
https://www.youtube.com/watch?v=-PATP8IZq5A

In [1]:
// package main

import ("fmt"
        "net/http"
        "io/ioutil"
        "net/url"
        "encoding/xml"
      )

/*
var washPostXML = []byte(`
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
   <sitemap>
      <loc>http://www.washingtonpost.com/news-politics-sitemap.xml</loc>
   </sitemap>
   <sitemap>
      <loc>http://www.washingtonpost.com/news-blogs-politics-sitemap.xml</loc>
   </sitemap>
   <sitemap>
      <loc>http://www.washingtonpost.com/news-opinions-sitemap.xml</loc>
   </sitemap>
</sitemapindex>`)
*/

// NOTE: Locations 首字母必须大写！否则被当做 internal，encoding/xml ummarshal 不能工作
type SitemapIndex struct {
    Locations []Location `xml:"sitemap"`
}

type Location struct {
    Loc string `xml:"loc"`
}

func main() {
    proxy := func(_ *http.Request) (*url.URL, error) {
        return url.Parse("http://proxy:8080")
    }
    transport := &http.Transport{Proxy: proxy}
    client := &http.Client{Transport: transport}

    resp, _ := client.Get("https://www.washingtonpost.com/news-sitemap-index.xml")
    bytes, _ := ioutil.ReadAll(resp.Body)
    resp.Body.Close()
    
    var s SitemapIndex
    xml.Unmarshal(bytes, &s)

    fmt.Println(s.Locations)
}

main()

[{http://www.washingtonpost.com/news-politics-sitemap.xml} {http://www.washingtonpost.com/news-blogs-politics-sitemap.xml} {http://www.washingtonpost.com/news-opinions-sitemap.xml} {http://www.washingtonpost.com/news-blogs-opinions-sitemap.xml} {http://www.washingtonpost.com/news-local-sitemap.xml} {http://www.washingtonpost.com/news-blogs-local-sitemap.xml} {http://www.washingtonpost.com/news-sports-sitemap.xml} {http://www.washingtonpost.com/news-blogs-sports-sitemap.xml} {http://www.washingtonpost.com/news-national-sitemap.xml} {http://www.washingtonpost.com/news-blogs-national-sitemap.xml} {http://www.washingtonpost.com/news-world-sitemap.xml} {http://www.washingtonpost.com/news-blogs-world-sitemap.xml} {http://www.washingtonpost.com/news-business-sitemap.xml} {http://www.washingtonpost.com/news-blogs-business-sitemap.xml} {http://www.washingtonpost.com/news-technology-sitemap.xml} {http://www.washingtonpost.com/news-blogs-technology-sitemap.xml} {http://www.washingtonpost.com/news

为了让结果格式化可读，Location 加入 String() 函数

In [2]:
import ("fmt"
        "net/http"
        "io/ioutil"
        "net/url"
        "encoding/xml"
      )

// NOTE: Locations 首字母必须大写！否则被当做 internal，encoding/xml ummarshal 不能工作
type SitemapIndex struct {
    Locations []Location `xml:"sitemap"`
}

type Location struct {
    Loc string `xml:"loc"`
}

func (l Location) String() string {
    return fmt.Sprintf(l.Loc)
}

func main() {
    proxy := func(_ *http.Request) (*url.URL, error) {
        return url.Parse("http://proxy:8080")
    }
    transport := &http.Transport{Proxy: proxy}
    client := &http.Client{Transport: transport}

    resp, _ := client.Get("https://www.washingtonpost.com/news-sitemap-index.xml")
    bytes, _ := ioutil.ReadAll(resp.Body)
    resp.Body.Close()
    
    var s SitemapIndex
    xml.Unmarshal(bytes, &s)

    fmt.Println(s.Locations)
}

main()

[http://www.washingtonpost.com/news-politics-sitemap.xml http://www.washingtonpost.com/news-blogs-politics-sitemap.xml http://www.washingtonpost.com/news-opinions-sitemap.xml http://www.washingtonpost.com/news-blogs-opinions-sitemap.xml http://www.washingtonpost.com/news-local-sitemap.xml http://www.washingtonpost.com/news-blogs-local-sitemap.xml http://www.washingtonpost.com/news-sports-sitemap.xml http://www.washingtonpost.com/news-blogs-sports-sitemap.xml http://www.washingtonpost.com/news-national-sitemap.xml http://www.washingtonpost.com/news-blogs-national-sitemap.xml http://www.washingtonpost.com/news-world-sitemap.xml http://www.washingtonpost.com/news-blogs-world-sitemap.xml http://www.washingtonpost.com/news-business-sitemap.xml http://www.washingtonpost.com/news-blogs-business-sitemap.xml http://www.washingtonpost.com/news-technology-sitemap.xml http://www.washingtonpost.com/news-blogs-technology-sitemap.xml http://www.washingtonpost.com/news-lifestyle-sitemap.xml http://www