forked from jackyzha0/hugo-obsidian
/
walk.go
145 lines (118 loc) · 3.6 KB
/
walk.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
package main
import (
"fmt"
"io/fs"
"io/ioutil"
"os"
"path"
"path/filepath"
"strings"
"time"
"github.com/gernest/front"
)
func findUniqueShortnames(shortname, source, existingPath string) (newSourceShortname, newExistingShortname string) {
/*
/animals/cats/paws
/animals/dogs/paws
shortname collision: paws
updated shortnames: cats/paws, dogs/paws
*/
if source == existingPath {
fmt.Printf("source (%s) and existing path (%s) are the same, not able to create unique shortnames", source, existingPath)
return
}
restOfSource := filepath.Dir(source)
restOfExisting := filepath.Dir(existingPath)
commonPath := shortname
for restOfSource != "/" && restOfExisting != "/" {
sourceBase := filepath.Base(restOfSource)
existingBase := filepath.Base(restOfExisting)
if sourceBase != existingBase {
newSourceShortname = path.Join(commonPath, sourceBase)
newExistingShortname = path.Join(commonPath, sourceBase)
return
}
commonPath = path.Join(sourceBase, commonPath)
restOfSource = filepath.Dir(restOfSource)
restOfExisting = filepath.Dir(restOfExisting)
}
return
}
// recursively walk directory and return all files with given extension
func walk(root, ext string, index bool, ignorePaths map[string]struct{}) (res []Link, i ContentIndex, shortnameToPathLookup map[string]string) {
fmt.Printf("Scraping %s\n", root)
i = make(ContentIndex)
m := front.NewMatter()
m.Handle("---", front.YAMLHandler)
nPrivate := 0
start := time.Now()
shortnameToPathLookup = map[string]string{}
err := filepath.WalkDir(root, func(fp string, d fs.DirEntry, e error) error {
if e != nil {
return e
}
// path normalize fp
s := filepath.ToSlash(fp)
if _, ignored := ignorePaths[s]; ignored {
fmt.Printf("[Ignored] %s\n", d.Name())
nPrivate++
} else if filepath.Ext(d.Name()) == ext {
res = append(res, parse(s, root)...)
if index {
text := getText(s)
frontmatter, body, err := m.Parse(strings.NewReader(text))
if err != nil {
frontmatter = map[string]interface{}{}
body = text
}
var title string
if parsedTitle, ok := frontmatter["title"]; ok {
title = parsedTitle.(string)
} else {
title = "Untitled Page"
}
// check if page is private
if parsedPrivate, ok := frontmatter["draft"]; !ok || !parsedPrivate.(bool) {
info, _ := os.Stat(s)
source := processSource(trim(s, root, ".md"))
// adjustedPath := UnicodeSanitize(strings.Replace(hugoPathTrim(trim(s, root, ".md")), " ", "-", -1))
i[source] = Content{
LastModified: info.ModTime(),
Title: title,
Content: body,
}
shortname := filepath.Base(source)
if existingPath, ok := shortnameToPathLookup[shortname]; ok {
// we have a collision with the shortname, lets find unique names for the two
delete(shortnameToPathLookup, shortname)
newSourceShortname, newExistingShortname := findUniqueShortnames(shortname, source, existingPath)
shortnameToPathLookup[newSourceShortname] = source
shortnameToPathLookup[newExistingShortname] = existingPath
} else {
shortnameToPathLookup[shortname] = source
}
} else {
fmt.Printf("[Ignored] %s\n", d.Name())
nPrivate++
}
}
}
return nil
})
if err != nil {
panic(err)
}
end := time.Now()
fmt.Printf("[DONE] in %s\n", end.Sub(start).Round(time.Millisecond))
fmt.Printf("Ignored %d private files \n", nPrivate)
fmt.Printf("Parsed %d total links \n", len(res))
return
}
func getText(dir string) string {
// read file
fileBytes, err := ioutil.ReadFile(dir)
if err != nil {
panic(err)
}
return string(fileBytes)
}