-
Notifications
You must be signed in to change notification settings - Fork 0
/
title.go
40 lines (33 loc) · 864 Bytes
/
title.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
package detector
import (
"net/http"
"regexp"
"strings"
"golang.org/x/net/html"
)
// ExtractTitle from a response
func ExtractTitle(bodyBytes []byte, header http.Header) (title string) {
var re = regexp.MustCompile(`(?im)<\s*title.*>(.*?)<\s*/\s*title>`)
bodyString := string(bodyBytes)
for _, match := range re.FindAllString(bodyString, -1) {
title = html.UnescapeString(trimTitleTags(match))
break
}
// Non UTF-8
contentType := header.Get("Content-Type")
// special cases
if strings.Contains(string(contentType), "charset=GB2312") {
titleUtf8, err := Decodegbk([]byte(title))
if err != nil {
return
}
return string(titleUtf8)
}
return
}
func trimTitleTags(title string) string {
// trim <title>*</title>
titleBegin := strings.Index(title, ">")
titleEnd := strings.Index(title, "</")
return title[titleBegin+1 : titleEnd]
}