Skip to content

Commit

Permalink
Email, ip, ipv4, ipv6, md5, sha1, sha256 helpers (#36)
Browse files Browse the repository at this point in the history
* Add hostname helper

* email helper

* Examples

* ipv4

* IP

* Check hashes

* Example_ipv4Match
  • Loading branch information
hedhyw committed Jun 19, 2022
1 parent 6aed30b commit 0d76bc6
Show file tree
Hide file tree
Showing 12 changed files with 897 additions and 9 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ bin

# Test binary, built with `go test -c`
*.test
testdata

# Output of the go coverage tool, specifically when used with LiteIDE
*.out
Expand Down
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ test:
go tool cover -func=coverage.out
.PHONY: test

test.fuzz:
# make test.fuzz NAME=FuzzIPv4
go test -fuzz $(NAME) "github.com/hedhyw/rex/pkg/dialect/base"
.PHONY: test.fuzz

tidy:
go mod tidy
.PHONY: vendor
Expand Down
16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Rex [work in progress]
# Rex

![Version](https://img.shields.io/github/v/tag/hedhyw/rex)
[![Go Report Card](https://goreportcard.com/badge/github.com/hedhyw/rex)](https://goreportcard.com/report/github.com/hedhyw/rex)
Expand Down Expand Up @@ -147,13 +147,23 @@ rex.Group.Define(rex.Chars.Single('a')).Repeat().OneOrMore() // (a)+
## Helper

Common regular expression patters that are ready to use.
> ⚠️ These patterns are likely to be changed in new versions.
```golang
rex.Helper.Phone() // Combines PhoneE164 and PhoneE123.
rex.Helper.PhoneE164() // +155555555
rex.Helper.PhoneE123() // Combines PhoneNationalE123 and PhoneInternationalE123.
rex.Helper.PhoneNationalE123() // (607) 123 4567
rex.Helper.PhoneInternationalE123() // +22 607 123 4567
rex.Helper.HostnameRFC952() // Hostname by RFC-952 (stricter).
rex.Helper.HostnameRFC1123() // Hostname by RFC-1123.
rex.Helper.Email() // Unquoted email pattern, it doesn't check RFC 5322 completely, due to high complexity.
rex.Helper.IP() // IPv4 or IPv6.
rex.Helper.IPv4() // 127.0.0.1
rex.Helper.IPv6() // 2001:0db8:85a3:0000:0000:8a2e:0370:7334
rex.Helper.MD5Hex() // d41d8cd98f00b204e9800998ecf8427e
rex.Helper.SHA1Hex() // da39a3ee5e6b4b0d3255bfef95601890afd80709
rex.Helper.SHA256Hex() // e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
```

## Examples
Expand Down Expand Up @@ -230,7 +240,7 @@ re.MatchString("123") // true
re.MatchString("hello.123") // false
```

## Example groups usage.
## Example groups usage

```golang
re := rex.New(
Expand All @@ -254,4 +264,4 @@ submatches := re.FindAllStringSubmatch(text, -1)

#### More examples

More examples can be found here: [pkg/rex/examples_test.go](pkg/rex/examples_test.go).
More examples can be found here: [examples_test.go](examples_test.go).
61 changes: 61 additions & 0 deletions pkg/rex/examples_test.go → examples_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,64 @@ func Example_compositeInReadme() {
// 123: true
// hello.123: false
}

func Example_emailMatch() {
re := rex.New(
rex.Chars.Begin(),
rex.Helper.Email(),
rex.Chars.End(),
).MustCompile()

fmt.Println("regular expression:", re.String())
fmt.Println("example@example.com:", re.MatchString("example@example.com"))
fmt.Println("@example.com:", re.MatchString("@example.com"))

// Output:
// regular expression: ^(?:(?:[[:alnum:]!#\$\x25&'\*\+\x2D/=\?\^_`\{\|\}~](?:[[:alnum:]!#\$\x25&'\*\+\x2D/=\?\^_`\{\|\}~\.]?[[:alnum:]!#\$\x25&'\*\+\x2D/=\?\^_`\{\|\}~]){0,31})@(?:[[:alnum:]][[:alnum:]\x2D]{0,62}(?:\.*[[:alnum:]][[:alnum:]\x2D]{0,62})*[[:alnum:]]))$
// example@example.com: true
// @example.com: false
}

func Example_emailFind() {
re := rex.New(
rex.Group.Define(
rex.Helper.Email(),
).WithName("email"),
).MustCompile()

const text = `
Duyen: duyen@example.com
Rex: rex@rex.example.com
`

fmt.Println("regular expression:", re.String())
submatches := re.FindAllStringSubmatch(text, -1)

for i, sub := range submatches {
fmt.Printf("submatches[%d]: %s\n", i, sub[0])
}

// Output:
// regular expression: (?P<email>(?:(?:[[:alnum:]!#\$\x25&'\*\+\x2D/=\?\^_`\{\|\}~](?:[[:alnum:]!#\$\x25&'\*\+\x2D/=\?\^_`\{\|\}~\.]?[[:alnum:]!#\$\x25&'\*\+\x2D/=\?\^_`\{\|\}~]){0,31})@(?:[[:alnum:]][[:alnum:]\x2D]{0,62}(?:\.*[[:alnum:]][[:alnum:]\x2D]{0,62})*[[:alnum:]])))
// submatches[0]: duyen@example.com
// submatches[1]: rex@rex.example.com
}

func Example_ipv4Match() {
re := rex.New(
rex.Chars.Begin(),
rex.Helper.IPv4(),
rex.Chars.End(),
).MustCompile()

fmt.Println("regular expression:", re.String())
fmt.Println("127.0.0.1:", re.MatchString("127.0.0.1"))
fmt.Println("172.217.16.14:", re.MatchString("172.217.16.14"))
fmt.Println("github.com:", re.MatchString("github.com"))

// Output:
// regular expression: ^(?:(?:(?:(?:25[0-5])|(?:2[0-4]\d)|(?:[01]?\d\d?))\.){3}(?:(?:25[0-5])|(?:2[0-4]\d)|(?:[01]?\d\d?)))$
// 127.0.0.1: true
// 172.217.16.14: true
// github.com: false
}
6 changes: 2 additions & 4 deletions internal/test/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,11 @@ type MatchTestCaseGroupSlice [][]MatchTestCase
func (tcs MatchTestCaseGroupSlice) Run(t *testing.T, tokens ...dialect.Token) {
t.Parallel()

tok := base.Group.Define(
re := rex.New(base.Group.Define(
base.Chars.Begin(),
base.Group.Define(tokens...).NonCaptured(),
base.Chars.End(),
).NonCaptured()

re := rex.New(tok).MustCompile()
).NonCaptured()).MustCompile()

for _, g := range tcs {
for _, tc := range g {
Expand Down
2 changes: 1 addition & 1 deletion pkg/dialect/base/chars.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ func (CharsBaseDialect) Range(from rune, to rune) ClassToken {
// Regex: `r`, `\\xHEX_CODE`, or `\\x{HEX_CODE}`.
func (CharsBaseDialect) Single(r rune) ClassToken {
// Minus can be a special case in classes.
if r < unicode.MaxASCII && r != '-' {
if r < unicode.MaxASCII && unicode.IsPrint(r) && r != '-' && r != '%' {
return newClassToken(
helper.StringToken(regexp.QuoteMeta(string(r))),
).withoutBrackets()
Expand Down
6 changes: 6 additions & 0 deletions pkg/dialect/base/chars_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,5 +188,11 @@ func TestRexChars_runes(t *testing.T) {
base.Chars.Runes("abc").Repeat().OneOrMore(),
},
Expected: `[abc]+`,
}, {
Name: "Punctuation",
Chain: []dialect.Token{
base.Chars.Runes("!#$%&'*+-/=?^_`{|}~"),
},
Expected: "[!#\\$\\x25&'\\*\\+\\x2D/=\\?\\^_`\\{\\|\\}~]",
}}.Run(t)
}
3 changes: 2 additions & 1 deletion pkg/dialect/base/group.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ func (gt GroupToken) WithName(name string) GroupToken {
}

// NonCaptured marks group as non-captured. It means that this group
// will not be included in group submatch results.
// will not be included in group submatch results. Use this for optimizing
// regular expressions if you don't need captured result.
//
// It overrides name if set.
func (gt GroupToken) NonCaptured() GroupToken {
Expand Down
30 changes: 30 additions & 0 deletions pkg/dialect/base/helper_hash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package base

import "github.com/hedhyw/rex/pkg/dialect"

// MD5Hex is a pattern for a cryptographic hash function MD5 in hex representation.
//
// Example: d41d8cd98f00b204e9800998ecf8427e.
func (h HelperDialect) MD5Hex() dialect.Token {
return h.hex(32)
}

// SHA1Hex is a pattern for a cryptographic hash function SHA1 in hex representation.
//
// Example: da39a3ee5e6b4b0d3255bfef95601890afd80709.
func (h HelperDialect) SHA1Hex() dialect.Token {
return h.hex(40)
}

// MD5 is a pattern for a cryptographic hash function SHA256 in hex representation.
//
// Example: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855.
func (h HelperDialect) SHA256Hex() dialect.Token {
return h.hex(64)
}

func (HelperDialect) hex(length int) dialect.Token {
return Group.Define(
Chars.HexDigits().Repeat().Exactly(length),
).NonCaptured()
}
154 changes: 154 additions & 0 deletions pkg/dialect/base/helper_hash_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
// nolint: gosec // It is a test.
package base_test

import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"fmt"
"testing"

"github.com/hedhyw/rex/internal/test"
"github.com/hedhyw/rex/pkg/dialect/base"
)

func getMD5ValidTestCases() test.MatchTestCaseSlice {
return test.MatchTestCaseSlice{{
Name: "md5_ok_example",
Value: "d41d8cd98f00b204e9800998ecf8427e",
}, {
Name: "md5_ok_upper",
Value: "D41D8CD98F00B204E9800998ECF8427E",
}, {
Name: "md5_ok_3",
Value: fmt.Sprintf("%x", md5.Sum([]byte("3"))),
}, {
Name: "md5_ok_4",
Value: fmt.Sprintf("%x", md5.Sum([]byte("4"))),
}, {
Name: "md5_ok_5",
Value: fmt.Sprintf("%x", md5.Sum([]byte("5"))),
}, {
Name: "md5_ok_6",
Value: fmt.Sprintf("%x", md5.Sum([]byte("6"))),
}}
}

func getMD5InvalidTestCases() test.MatchTestCaseSlice {
return test.MatchTestCaseSlice{{
Name: "md5_non_hex_p",
Value: "p41d8cd98f00b204e9800998ecf8427e",
}, {
Name: "md5_short",
Value: fmt.Sprintf("%x", md5.Sum([]byte("short")))[0:31],
}, {
Name: "md5_long",
Value: fmt.Sprintf("%x", md5.Sum([]byte("long"))) + "0",
}, {
Name: "md5_empty",
Value: "",
}}
}

func TestMD5Hex(t *testing.T) {
test.MatchTestCaseGroupSlice{
getMD5ValidTestCases().WithMatched(true),
getMD5InvalidTestCases().WithMatched(false),
getSHA1ValidTestCases().WithMatched(false),
getSHA256ValidTestCases().WithMatched(false),
}.Run(t, base.Helper.MD5Hex())
}

func getSHA1ValidTestCases() test.MatchTestCaseSlice {
return test.MatchTestCaseSlice{{
Name: "sha1_ok_example",
Value: "da39a3ee5e6b4b0d3255bfef95601890afd80709",
}, {
Name: "sha1_ok_upper",
Value: "DA39A3EE5E6B4B0D3255BFEF95601890AFD80709",
}, {
Name: "sha1_ok_3",
Value: fmt.Sprintf("%x", sha1.Sum([]byte("3"))),
}, {
Name: "sha1_ok_4",
Value: fmt.Sprintf("%x", sha1.Sum([]byte("4"))),
}, {
Name: "sha1_ok_5",
Value: fmt.Sprintf("%x", sha1.Sum([]byte("5"))),
}, {
Name: "sha1_ok_6",
Value: fmt.Sprintf("%x", sha1.Sum([]byte("6"))),
}}
}

func getSHA1InvalidTestCases() test.MatchTestCaseSlice {
return test.MatchTestCaseSlice{{
Name: "sha1_non_hex_p",
Value: "pa39a3ee5e6b4b0d3255bfef95601890afd80709",
}, {
Name: "sha1_short",
Value: fmt.Sprintf("%x", sha1.Sum([]byte("short")))[0:39],
}, {
Name: "sha1_long",
Value: fmt.Sprintf("%x", sha1.Sum([]byte("long"))) + "0",
}, {
Name: "sha1_empty",
Value: "",
}}
}

func TestSHA1Hex(t *testing.T) {
test.MatchTestCaseGroupSlice{
getSHA1ValidTestCases().WithMatched(true),
getSHA1InvalidTestCases().WithMatched(false),
getMD5ValidTestCases().WithMatched(false),
getSHA256ValidTestCases().WithMatched(false),
}.Run(t, base.Helper.SHA1Hex())
}

func getSHA256ValidTestCases() test.MatchTestCaseSlice {
return test.MatchTestCaseSlice{{
Name: "sha256_ok_example",
Value: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
}, {
Name: "sha256_ok_upper",
Value: "E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855",
}, {
Name: "sha256_ok_3",
Value: fmt.Sprintf("%x", sha256.Sum256([]byte("3"))),
}, {
Name: "sha256_ok_4",
Value: fmt.Sprintf("%x", sha256.Sum256([]byte("4"))),
}, {
Name: "sha256_ok_5",
Value: fmt.Sprintf("%x", sha256.Sum256([]byte("5"))),
}, {
Name: "sha256_ok_6",
Value: fmt.Sprintf("%x", sha256.Sum256([]byte("6"))),
}}
}

func getSHA256InvalidTestCases() test.MatchTestCaseSlice {
return test.MatchTestCaseSlice{{
Name: "sha256_non_hex_p",
Value: "p3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
}, {
Name: "sha256_short",
Value: fmt.Sprintf("%x", sha256.Sum256([]byte("short")))[0:63],
}, {
Name: "sha256_long",
Value: fmt.Sprintf("%x", sha256.Sum256([]byte("long"))) + "0",
}, {
Name: "sha256_empty",
Value: "",
}}
}

func TestSHA256Hex(t *testing.T) {
test.MatchTestCaseGroupSlice{
getSHA256ValidTestCases().WithMatched(true),
getSHA256InvalidTestCases().WithMatched(false),
getSHA1ValidTestCases().WithMatched(false),
getMD5ValidTestCases().WithMatched(false),
}.Run(t, base.Helper.SHA256Hex())
}
Loading

0 comments on commit 0d76bc6

Please sign in to comment.