-
Notifications
You must be signed in to change notification settings - Fork 37
/
fileutil.go
81 lines (68 loc) · 2.03 KB
/
fileutil.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
package scraper
import (
"bytes"
"net/url"
"os"
"path/filepath"
"go.uber.org/zap"
)
var (
// PageExtension is the file extension that downloaded pages get
PageExtension = ".html"
// PageDirIndex is the file name of the index file for every dir
PageDirIndex = "index" + PageExtension
)
// GetPageFilePath returns a filename for a URL that represents a page.
func GetPageFilePath(url *url.URL) string {
fileName := url.Path
// root of domain will be index.html
if fileName == "" || fileName == "/" {
fileName = PageDirIndex
// directory index will be index.html in the directory
} else if fileName[len(fileName)-1] == '/' {
fileName += PageDirIndex
} else {
ext := filepath.Ext(fileName)
// if file extension is missing add .html
if ext == "" {
fileName += PageExtension
} else if ext != PageExtension { // replace any other extension with .html
fileName = fileName[:len(fileName)-len(ext)] + PageExtension
}
}
return fileName
}
// GetFilePath returns a file path for a URL to store the URL content in
func (s *Scraper) GetFilePath(url *url.URL, isAPage bool) string {
fileName := url.Path
if isAPage {
fileName = GetPageFilePath(url)
}
var externalHost string
if url.Host != s.URL.Host {
externalHost = "_" + url.Host // _ is a prefix for external domains on the filesystem
}
return filepath.Join(s.OutputDirectory, s.URL.Host, externalHost, fileName)
}
func (s *Scraper) writeFile(filePath string, buf *bytes.Buffer) error {
dir := filepath.Dir(filePath)
if len(dir) < len(s.URL.Host) { // nothing to append if it is the root dir
dir = filepath.Join(".", s.URL.Host, dir)
}
s.log.Debug("Creating dir", zap.String("Path", dir))
err := os.MkdirAll(dir, os.ModePerm)
if err != nil {
return err
}
s.log.Debug("Creating file", zap.String("Path", filePath))
f, err := os.Create(filePath)
if err != nil {
return err
}
if _, err = f.Write(buf.Bytes()); err != nil {
_ = f.Close() // try to close and remove file but return the first error
_ = os.Remove(filePath)
return err
}
return f.Close()
}