/
html.go
155 lines (134 loc) · 4.38 KB
/
html.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
package main
import (
"errors"
"html"
"html/template"
"log"
"net/http"
"path/filepath"
"regexp"
)
// renderPage builds a full page.
//
// The specified content template is used to build the content section of the
// page wrapped between header and footer.
func renderPage(settings *Config, rw http.ResponseWriter,
contentTemplate string, data interface{}) error {
// Ensure the specified content template is valid.
matched, err := regexp.MatchString("^[_a-zA-Z]+$", contentTemplate)
if err != nil || !matched {
return errors.New("invalid template name")
}
header, err := template.ParseFiles(
filepath.Join(settings.TemplateDir, "_header.html"))
if err != nil {
log.Printf("Failed to load header: %s", err)
return err
}
// Content.
funcMap := template.FuncMap{
"getRowCSSClass": getRowCSSClass,
}
// We need the base path as that is the name that gets assigned to the
// template internally due to how we create the template. That is, through
// New(), then ParseFiles() - ParseFiles() sets the name of the template
// using the basename of the file.
contentTemplateBasePath := contentTemplate + ".html"
contentTemplatePath := filepath.Join(settings.TemplateDir,
contentTemplateBasePath)
content, err := template.New("content").Funcs(funcMap).ParseFiles(
contentTemplatePath)
if err != nil {
log.Printf("Failed to load content template [%s]: %s", contentTemplate, err)
return err
}
// Footer.
footer, err := template.ParseFiles(
filepath.Join(settings.TemplateDir, "_footer.html"))
if err != nil {
log.Printf("Failed to load footer: %s", err)
return err
}
// Execute the templates and write them out.
err = header.Execute(rw, data)
if err != nil {
log.Printf("Failed to execute header: %s", err)
return err
}
err = content.ExecuteTemplate(rw, contentTemplateBasePath, data)
if err != nil {
log.Printf("Failed to execute content: %s", err)
return err
}
err = footer.Execute(rw, data)
if err != nil {
log.Printf("Failed to execute footer: %s", err)
return err
}
return nil
}
// getRowCSSClass takes a row index and determines the css class to use.
func getRowCSSClass(index int) string {
if index%2 == 0 {
return "row1"
}
return "row2"
}
// getHTMLDescription builds the HTML encoded description.
//
// We call this while generating HTML.
//
// Text is the unencoded string, and we return HTML encoded.
//
// We have this so we can make inline URLs into links.
func getHTMLDescription(text string) template.HTML {
// Encode the entire string as HTML first.
html := template.HTMLEscapeString(text)
// Wrap up URLs in <a>.
//
// I previously used this re: \b(https?://\S+)
//
// But there were issues with it recognising non-URL characters. I even found
// it match a space which seems like it should be impossible.
re := regexp.MustCompile(`\b(https?://[A-Za-z0-9\-\._~:/\?#\[\]@!\$&'\(\)\*\+,;=]+)`)
return template.HTML(re.ReplaceAllString(html, `<a href="$1">$1</a>`))
}
var htmlRE = regexp.MustCompile(`(?s)<.*?>`)
var multiSpaceRE = regexp.MustCompile(`\s+`)
// sanitiseItemText takes text (e.g., title or description) and removes any HTML
// markup. This is because some feeds (e.g., Slashdot) include a lot of markup
// I don't want to actually show.
//
// We also decode HTML entities since apparently we can get these through to
// this point (they will be encoded again as necessary when we render the
// page).
//
// For example in a raw XML from Slashdot we have this:
//
// <item><title>AT&amp;T Gets Patent To Monitor and Track File-Sharing Traffic</title>
//
// Which gets placed into the database as:
// AT&T Gets Patent To Monitor and Track File-Sharing Traffic
//
// This can be used to take any string which has HTML in it to clean up that
// string and make it non-HTML.
//
// While elements such as 'title' can have HTMLin them, this seems applied
// inconsistently. For instance, consider this title from a Slashdot feed:
//
// <title>Google Maps Updated With Skyfall</em> Island Japan Terrain</title>
//
// That is: </em> in there but no <em>.
//
// In the database this is present as </em>.
//
// Thus we do not place the HTML into the page raw.
func sanitiseItemText(text string) string {
// First remove raw HTML.
text = htmlRE.ReplaceAllString(text, "")
// Decode HTML entities.
text = html.UnescapeString(text)
// Turn any multiple spaces into a single space.
text = multiSpaceRE.ReplaceAllString(text, " ")
return text
}