Skip to content

Commit

Permalink
ignore base64 image encoded download
Browse files Browse the repository at this point in the history
  • Loading branch information
mesaglio committed Nov 11, 2021
1 parent 91b6a81 commit 7cb4ed5
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions pkg/crawler/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"net/http"
"net/http/cookiejar"
"strings"

"github.com/gocolly/colly"
)
Expand Down Expand Up @@ -40,6 +41,9 @@ func Collector(ctx context.Context, url string, projectPath string, collyOpts ..
c.OnHTML("img[src]", func(e *colly.HTMLElement) {
// src attribute
link := e.Attr("src")
if strings.Contains(link, "data:image") {
return
}
// Print link
fmt.Println("Img found", "-->", link)
// extraction
Expand Down

0 comments on commit 7cb4ed5

Please sign in to comment.