/
convert.go
120 lines (103 loc) · 2.64 KB
/
convert.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
package googledrive2hugo
import (
"bytes"
"fmt"
"strings"
"github.com/client9/ilog"
"github.com/spf13/cast"
"golang.org/x/net/html"
)
// remove non-breaking spaces. Unclear why google adds them or how
// they get added.
func removeNbsp(src string) string {
return strings.Replace(src, "\u00a0", " ", -1)
}
type Converter struct {
Logger ilog.Logger
Filters []Runner
}
func (c *Converter) ToHTML(src []byte, fileMeta map[string]interface{}) ([]byte, error) {
root, err := html.Parse(bytes.NewReader(src))
if err != nil {
return nil, err
}
content, textMeta, err := c.FromNode(getBody(root))
if err != nil {
return nil, err
}
meta := MetaMerge(textMeta, fileMeta)
// generate some extra tags for rollup or archives
value, ok := meta["date"]
if !ok {
return nil, fmt.Errorf("unable to get document date in %s", string(src))
}
date, err := cast.ToTimeE(value)
if err != nil {
return nil, fmt.Errorf("unable convert date '%T' %v to time.Time", value, value)
}
meta["year"] = fmt.Sprintf("%d", date.Year())
meta["month"] = fmt.Sprintf("%d/%02d", date.Year(), date.Month())
meta["day"] = fmt.Sprintf("%d/%02d/%02d", date.Year(), date.Month(), date.Day())
return HugoContentWrite(content, meta)
}
func (c *Converter) parseFragment(src string) (string, error) {
body := newElementNode("body")
nodes, err := html.ParseFragment(strings.NewReader(src), body)
if err != nil {
return "", err
}
for _, n := range nodes {
body.AppendChild(n)
}
content, _, err := c.FromNode(body)
if err != nil {
return "", err
}
return string(content), nil
}
// if you already have a google doc node
func (c *Converter) FromNode(root *html.Node) ([]byte, map[string]interface{}, error) {
// hugo specific
meta, err := HugoFrontMatter(root)
if err != nil {
return nil, nil, err
}
// generic transforms
tx := []func(*html.Node) error{
// gdoc specific
GdocImg,
GdocSpan,
GdocBlockquotePre,
GdocBlockquote,
GdocCodeBlock,
GdocTable,
GdocAttr,
}
for _, fn := range tx {
if err := fn(root); err != nil {
return nil, nil, err
}
}
for _, fn := range c.Filters {
// get name of function
fname := fmt.Sprintf("%T", fn)
if idx := strings.LastIndexByte(fname, '.'); idx != -1 {
fname = fname[idx+1:]
}
mlog := c.Logger.With("fn", fname)
if err := fn.Run(root, mlog); err != nil {
return nil, nil, err
}
}
// Render into buffer
buf := bytes.Buffer{}
if err := renderChildren(&buf, root); err != nil {
return nil, nil, err
}
out := buf.Bytes()
// final hugo fixups.. needed to be done outside of tree
out = unescapeShortcodes(out)
out = unescapeEntities(out)
out = bytes.TrimSpace(out)
return out, meta, nil
}