Skip to content

Commit

Permalink
add ability to filter logs by status
Browse files Browse the repository at this point in the history
  • Loading branch information
atomicptr committed Apr 2, 2020
1 parent 17f5e60 commit d605a74
Show file tree
Hide file tree
Showing 11 changed files with 385 additions and 22 deletions.
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,23 @@ Add some cookies/headers:
$ crab crawl:sitemap https://domain.com/sitemap.xml --cookie auth_token=12345 --header X-Bypass-Cache=1
```

### Filter by Status Code

You can filter the output by it's status code

```bash
# This will only return responses with a 200 OK
$ crab crawl:sitemap https://domain.com/sitemap.xml --filter-status=200
# This will only return responses that are not OK
$ crab crawl:sitemap https://domain.com/sitemap.xml --filter-status=!200
# This will only return responses between 500-599 (range)
$ crab crawl:sitemap https://domain.com/sitemap.xml --filter-status=500-599
# This will only return responses with 200 or 404 (multiple, be aware if one condition is true they all are)
$ crab crawl:sitemap https://domain.com/sitemap.xml --filter-status=200,404
# This will only return responses with a code greater than 500
$ crab crawl:sitemap https://domain.com/sitemap.xml --filter-status=>500
```

## License

MIT
8 changes: 8 additions & 0 deletions pkg/cli/crawl/commons.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ func registerStandardCrawlCommandFlags(cmd *cobra.Command, flagOptions *crawlerF
nil,
"add headers (as key=value pairs) to each request",
)
cmd.PersistentFlags().StringVarP(
&flagOptions.FilterStatusQuery,
"filter-status",
"",
"",
"filter logs by status",
)
}

func registerStandardCrawlCommandFlagModifiers(modifier *crawler.RequestModifier, flagOptions crawlerFlagOptions) {
Expand Down Expand Up @@ -82,6 +89,7 @@ func crawlUrls(urls []string, modifier crawler.RequestModifier, flagOptions craw
HttpClient: http.Client{
Timeout: flagOptions.HttpTimeout,
},
FilterStatusQuery: flagOptions.FilterStatusQuery,
}
crawl.Crawl(requests)

Expand Down
15 changes: 8 additions & 7 deletions pkg/cli/crawl/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ import (
)

type crawlerFlagOptions struct {
NumberOfWorkers int
PrefixUrl string
HttpTimeout time.Duration
CookieStrings []string
HeaderStrings []string
cookieMap map[string]string
headerMap map[string]string
NumberOfWorkers int
PrefixUrl string
HttpTimeout time.Duration
CookieStrings []string
HeaderStrings []string
FilterStatusQuery string
cookieMap map[string]string
headerMap map[string]string
}

const (
Expand Down
12 changes: 8 additions & 4 deletions pkg/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@ import (
"sync"
"time"

"github.com/atomicptr/crab/pkg/filter"
"github.com/pkg/errors"
)

//Crawler crawls urls in parallel
type Crawler struct {
HttpClient http.Client
NumberOfWorkers int
HttpClient http.Client
NumberOfWorkers int
FilterStatusQuery string
statusFilter *filter.Filter
printMutex sync.Mutex
}

//Crawl crawls a list of HTTP requests with a set number of workers
Expand Down Expand Up @@ -50,9 +54,9 @@ func (c *Crawler) crawlRequest(req *http.Request) {
duration := time.Since(requestStartTime)

if err != nil {
logError(errors.Wrapf(err, "error with url %s", req.URL), req.URL.String(), duration)
c.logError(errors.Wrapf(err, "error with url %s", req.URL), req.URL.String(), duration)
return
}

log(res.StatusCode, req.URL.String(), duration)
c.log(res.StatusCode, req.URL.String(), duration)
}
27 changes: 16 additions & 11 deletions pkg/crawler/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,46 @@ import (
"encoding/base64"
"encoding/json"
"fmt"
"sync"
"time"
)

var printMutex sync.Mutex
"github.com/atomicptr/crab/pkg/filter"
)

// safePrintln logs a message protected by a mutex
func safePrintln(message string) {
printMutex.Lock()
fmt.Println(message)
printMutex.Unlock()
func (c *Crawler) safePrintln(statusCode int, message string) {
if c.statusFilter == nil {
c.statusFilter = filter.NewFilter()
}

if c.statusFilter.IsValid(c.FilterStatusQuery, int64(statusCode)) {
c.printMutex.Lock()
fmt.Println(message)
c.printMutex.Unlock()
}
}

// log logs a json log with the status code, url, timestamp and duration of the request
func log(statusCode int, url string, duration time.Duration) {
func (c *Crawler) log(statusCode int, url string, duration time.Duration) {
message := fmt.Sprintf(
`{"status": %d, "url": "%s", "time": %d, "duration": %d}`,
statusCode,
url,
time.Now().Unix(),
duration.Milliseconds(),
)
safePrintln(message)
c.safePrintln(statusCode, message)
}

// logError logs a json log with an error, url, timestamp and duration of the request
func logError(err error, url string, duration time.Duration) {
func (c *Crawler) logError(err error, url string, duration time.Duration) {
message := fmt.Sprintf(
`{"err": %s, "url": "%s", "time": %d, "duration": %d}`,
escapeString(err.Error()),
url,
time.Now().Unix(),
duration.Milliseconds(),
)
safePrintln(message)
c.safePrintln(218, message)
}

// escapeString escapes a string to be used as a json value
Expand Down
45 changes: 45 additions & 0 deletions pkg/filter/filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package filter

import (
"strings"
)

// Filter is a collection of rules
type Filter struct {
Rules []Rule
}

// NewFilter returns a new filter with a set number of rules
func NewFilter() *Filter {
filter := Filter{
Rules: []Rule{
RuleIsValue,
RuleIsNotValue,
RuleIsInRange,
RuleIsNotInRange,
RuleIsGreaterThan,
RuleIsSmallerThan,
},
}
return &filter
}

// IsValid checks if the query is valid for the given value
func (f *Filter) IsValid(query string, value int64) bool {
queries := strings.Split(query, ",")

result := false

for _, q := range queries {
q = strings.TrimSpace(q)

for _, rule := range f.Rules {
res := rule(q, value)
if res.DoesApply() {
result = result || res.AsBool()
}
}
}

return result
}
60 changes: 60 additions & 0 deletions pkg/filter/filter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package filter

import (
"github.com/stretchr/testify/assert"
"testing"
)

func TestFilterIsValidSimple(t *testing.T) {
filter := NewFilter()
assert.True(t, filter.IsValid("200", 200))
assert.False(t, filter.IsValid("200", 404))
assert.False(t, filter.IsValid("!200", 200))
assert.True(t, filter.IsValid("!200", 404))
}

func TestFilterIsValidRange(t *testing.T) {
filter := NewFilter()
assert.True(t, filter.IsValid("200-299", 200))
assert.True(t, filter.IsValid("200-299", 218))
assert.True(t, filter.IsValid("200-299", 299))
assert.False(t, filter.IsValid("200-299", 404))
}

func TestFilterIsValidMultipleRanges(t *testing.T) {
filter := NewFilter()
assert.True(t, filter.IsValid("200-299,400-499", 218))
assert.True(t, filter.IsValid("200-299,400-499", 404))
assert.False(t, filter.IsValid("200-299,400-499", 500))
}

func TestFilterIsValidNotInRange(t *testing.T) {
filter := NewFilter()
assert.True(t, filter.IsValid("!200-299", 404))
assert.True(t, filter.IsValid("!200-299,400-499", 404))
}

func TestFilterIsValidRangeWithExtras(t *testing.T) {
filter := NewFilter()
assert.True(t, filter.IsValid("200-299,404", 200))
assert.True(t, filter.IsValid("200-299,404", 218))
assert.True(t, filter.IsValid("200-299,404", 299))
assert.True(t, filter.IsValid("200-299,404", 404))
assert.False(t, filter.IsValid("200-299,404", 500))
assert.True(t, filter.IsValid("200-299,404,500", 500))
}

func TestFilterIsValidGreaterSmallerThan(t *testing.T) {
filter := NewFilter()
assert.True(t, filter.IsValid(">400", 500))
assert.True(t, filter.IsValid("<400,404", 404))
assert.True(t, filter.IsValid(">200,<299", 218))
}

func TestFilterInvalidCases(t *testing.T) {
filter := NewFilter()
// should be true because one of them is correct
assert.True(t, filter.IsValid("200-299,!218", 218))
// should be true because 404 is not !200-299
assert.True(t, filter.IsValid("!200-299,!400-499", 404))
}
43 changes: 43 additions & 0 deletions pkg/filter/rules.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package filter

// RuleResult is the result of a rule execution
type RuleResult uint8

const (
// ResultFalse means the Rule did fail
ResultFalse RuleResult = iota
// ResultTrue means the Rule succeeded
ResultTrue
// ResultDoesNotApply means the Rule was not applicable for the given query
ResultDoesNotApply
)

// Rule is a function that returns a result depending on the given query and value
type Rule func(query string, value int64) RuleResult

// AsBool converts result into boolean, keep in mind that a rule that did not apply
// will always return true
func (res RuleResult) AsBool() bool {
if !res.DoesApply() {
return true
}
return res == ResultTrue
}

// DoesApply checks if the rule did apply to the rule
func (res RuleResult) DoesApply() bool {
return res != ResultDoesNotApply
}

// Invert the boolean result, ResultDoesNotApply will stay the same though
func (res RuleResult) Invert() RuleResult {
if res == ResultDoesNotApply {
return ResultDoesNotApply
}

if res == ResultFalse {
return ResultTrue
}

return ResultFalse
}
24 changes: 24 additions & 0 deletions pkg/filter/rules_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package filter

import (
"github.com/stretchr/testify/assert"
"testing"
)

func TestAsBool(t *testing.T) {
assert.True(t, ResultTrue.AsBool())
assert.False(t, ResultFalse.AsBool())
assert.True(t, ResultDoesNotApply.AsBool())
}

func TestDoesApply(t *testing.T) {
assert.False(t, ResultDoesNotApply.DoesApply())
assert.True(t, ResultTrue.DoesApply())
assert.True(t, ResultFalse.DoesApply())
}

func TestInvert(t *testing.T) {
assert.Equal(t, ResultTrue, ResultFalse.Invert())
assert.Equal(t, ResultFalse, ResultTrue.Invert())
assert.Equal(t, ResultDoesNotApply, ResultDoesNotApply.Invert())
}
Loading

0 comments on commit d605a74

Please sign in to comment.