/
http_utils.go
86 lines (75 loc) · 1.9 KB
/
http_utils.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
package translator
import (
"bytes"
"io"
"io/ioutil"
"log"
"net/http"
"strings"
"github.com/imankulov/linguee-api/cache"
"golang.org/x/net/html/charset"
)
func downloadURL(cache cache.Cache, userAgent string, url string) (io.Reader, error) {
if cache != nil {
_, bb, err := cache.Get(url)
if err == nil {
log.Printf("Cache hit for %s", url)
return bytes.NewReader(bb), nil
}
log.Printf("Cache miss for %s", url)
}
// Make request object, and send GET request to the server
client := http.Client{}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
log.Print("HTTP error:", err)
return nil, err
}
req.Header.Add("User-Agent", userAgent)
resp, err := client.Do(req)
if err != nil {
log.Print("HTTP error:", err)
return nil, err
}
// Read response content to byte array
body, err := ioutil.ReadAll(resp.Body)
defer resp.Body.Close()
if err != nil {
log.Print("HTTP error:", err)
return nil, err
}
// detect charset, and make reader returning unicode data
unicodeReader, err := charset.NewReaderLabel(
detectCharset(resp), bytes.NewReader(body))
if err != nil {
log.Print("HTTP error:", err)
return nil, err
}
unicodeData, err := ioutil.ReadAll(unicodeReader)
if err != nil {
log.Print("HTTP error:", err)
return nil, err
}
if cache != nil {
err = cache.Set(url, resp.StatusCode, unicodeData)
if err != nil {
log.Printf("Warning: unable to populate the cache: %s", err)
}
}
rd := bytes.NewReader(unicodeData)
return rd, nil
}
func detectCharset(resp *http.Response) string {
_, utf8 := charset.Lookup("utf-8")
charsetChunks := strings.Split(resp.Header.Get("Content-Type"), `"`)
if len(charsetChunks) > 1 {
_, charset := charset.Lookup(charsetChunks[len(charsetChunks)-2])
if charset != "" {
return charset
}
log.Printf("Warning. Unable to define charset in %s. Fall back to %s",
charset, utf8)
return utf8
}
return utf8
}