/
fetcher.go
112 lines (100 loc) · 2.09 KB
/
fetcher.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
package fetcher
import (
"context"
"errors"
"log"
"net/url"
"os"
"sort"
"time"
"github.com/hi20160616/ms-ucpnz/configs"
)
// Fetch fetch and storage all stuffs to `db/articles.json`
func Fetch() error {
defer log.Printf("[%s] Fetch Done.", configs.Data.MS["ucpnz"].Title)
log.Printf("[%s] Fetching ...", configs.Data.MS["ucpnz"].Title)
as, err := fetch(context.Background())
if err != nil {
return err
}
as, err = merge(as)
if err != nil {
return err
}
as, err = filter(as)
if err != nil {
return err
}
sort.Sort(sort.Reverse(ByUpdateTime(as)))
return storage(as)
}
// fetch fetch all articles by url set in config.json
func fetch(ctx context.Context) (as []*Article, err error) {
links, err := fetchLinks()
if err != nil {
return
}
for _, link := range links {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
a := NewArticle()
a, err = a.fetchArticle(link)
if err != nil {
if !errors.Is(err, ErrTimeOverDays) {
// just for log to ignore err
link, _ := url.QueryUnescape(link)
log.Printf("[%s] fetch error: %v, link: %s",
configs.Data.MS["ucpnz"].Title, err, link)
}
err = nil
continue
}
// ignore redundant articles
exist := false
for _, _a := range as {
if a.Title == _a.Title {
exist = true
}
}
if !exist {
as = append(as, a)
}
}
}
return
}
// merge will merge local data and fetched data from db/articles.json and website respectively
func merge(as []*Article) ([]*Article, error) {
dbAs, err := load()
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return as, nil
}
return nil, err
}
as = append(as, dbAs...)
return as, nil
}
var ErrTimeOverDays error = errors.New("article update time out of range")
func filter(as []*Article) ([]*Article, error) {
rt := []*Article{}
for _, a := range as {
if a.UpdateTime.AsTime().
Before(time.Now().AddDate(0, 0, -3)) {
// before 3 days, so ignore
continue
}
exist := false
for _, _a := range rt {
if a.Id == _a.Id {
exist = true
}
}
if !exist {
rt = append(rt, a)
}
}
return rt, nil
}