Skip to content

Commit

Permalink
log error url, not panic
Browse files Browse the repository at this point in the history
  • Loading branch information
bold881 committed Dec 21, 2017
1 parent 18cf014 commit 0c9196f
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 4 deletions.
5 changes: 4 additions & 1 deletion crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,10 @@ func CrawlGoQuery(szurl string, chPI chan PageItem, ch2Crawl chan string) {

doc, err := goquery.NewDocument(szurl)
if err != nil {
log.Fatal(err)
//log.Fatal(err)
log.Println(err)
crwedUrls.Del(szurl)
return
}

// news article
Expand Down
2 changes: 0 additions & 2 deletions news.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,10 @@ func main() {
if len(ch2Crawl) > 0 {
url := <-ch2Crawl
go CrawlGoQuery(url, chPageItem, ch2Crawl)
fmt.Println("sleep 10 milliseconds...")
time.Sleep(10 * time.Millisecond)
continue
} else if counter < 100 {
counter++
fmt.Println("sleep 10 seconds...")
time.Sleep(10 * time.Second)
continue
}
Expand Down
9 changes: 9 additions & 0 deletions pageitem.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,12 @@ func (cURLs *CrawledURLs) Add(url string) {
cURLs.crawled[url] = true
cURLs.Unlock()
}

func (cURLs *CrawledURLs) Del(url string) {
cURLs.Lock()
if cURLs.crawled == nil {
cURLs.crawled = make(map[string]bool)
}
delete(cURLs.crawled, url)
cURLs.Unlock()
}
2 changes: 1 addition & 1 deletion saver.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func string2Time(rawstr string) time.Time {
} else if strings.Contains(rawstr, "來源") {
strarr = strings.Split(rawstr, "來源")
} else {
return
return time.Now()
}
rawstr = strarr[0]
rawstr = strings.TrimSpace(rawstr)
Expand Down

0 comments on commit 0c9196f

Please sign in to comment.