Skip to content

Commit db8c4f0

Browse files
marko-bekhtagsmet
authored andcommitted
HV-1185 Add more tests for URL validation and improve the existing validators
1 parent 5b9e30e commit db8c4f0

File tree

4 files changed

+198
-49
lines changed

4 files changed

+198
-49
lines changed

engine/src/main/java/org/hibernate/validator/constraintvalidators/RegexpURLValidator.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import javax.validation.ConstraintValidatorContext;
1313

1414
import org.hibernate.validator.constraints.URL;
15+
import org.hibernate.validator.internal.util.DomainNameUtil;
1516

1617
/**
1718
* Validate that the character sequence (e.g. string) is a valid URL using a regular expression.
@@ -54,6 +55,10 @@ public boolean isValid(CharSequence value, ConstraintValidatorContext constraint
5455
return false;
5556
}
5657

58+
if ( !DomainNameUtil.isValidDomainAddress( values.getHost() ) ) {
59+
return false;
60+
}
61+
5762
if ( protocol != null && protocol.length() > 0 && !protocol.equals( values.getProtocol() ) ) {
5863
return false;
5964
}

engine/src/main/java/org/hibernate/validator/internal/constraintvalidators/hv/EmailValidator.java

Lines changed: 8 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@
88

99
import static java.util.regex.Pattern.CASE_INSENSITIVE;
1010

11-
import java.net.IDN;
1211
import java.util.regex.Matcher;
1312
import java.util.regex.Pattern;
14-
1513
import javax.validation.ConstraintValidator;
1614
import javax.validation.ConstraintValidatorContext;
1715

1816
import org.hibernate.validator.constraints.Email;
17+
import org.hibernate.validator.internal.util.DomainNameUtil;
1918

2019
/**
2120
* Checks that a given character sequence (e.g. string) is a well-formed email address.
@@ -33,20 +32,11 @@
3332
* @author Guillaume Smet
3433
*/
3534
public class EmailValidator implements ConstraintValidator<Email, CharSequence> {
36-
private static final String LOCAL_PART_ATOM = "[a-z0-9!#$%&'*+/=?^_`{|}~\u0080-\uFFFF-]";
37-
private static final String LOCAL_PART_INSIDE_QUOTES_ATOM = "([a-z0-9!#$%&'*.(),<>\\[\\]:; @+/=?^_`{|}~\u0080-\uFFFF-]|\\\\\\\\|\\\\\\\")";
38-
private static final String DOMAIN_LABEL = "[a-z0-9!#$%&'*+/=?^_`{|}~-]";
39-
private static final String DOMAIN = DOMAIN_LABEL + "+(\\." + DOMAIN_LABEL + "+)*";
40-
private static final String IP_DOMAIN = "\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\]";
41-
//IP v6 regex taken from http://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses
42-
private static final String IP_V6_DOMAIN = "(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))";
35+
4336
private static final int MAX_LOCAL_PART_LENGTH = 64;
44-
/**
45-
* This is the maximum length of a domain name. But be aware that each label (parts separated by a dot) of the
46-
* domain name must be at most 63 characters long. This is verified by {@link IDN#toASCII(String)}.
47-
*/
48-
private static final int MAX_DOMAIN_PART_LENGTH = 255;
4937

38+
private static final String LOCAL_PART_ATOM = "[a-z0-9!#$%&'*+/=?^_`{|}~\u0080-\uFFFF-]";
39+
private static final String LOCAL_PART_INSIDE_QUOTES_ATOM = "([a-z0-9!#$%&'*.(),<>\\[\\]:; @+/=?^_`{|}~\u0080-\uFFFF-]|\\\\\\\\|\\\\\\\")";
5040
/**
5141
* Regular expression for the local part of an email address (everything before '@')
5242
*/
@@ -55,13 +45,6 @@ public class EmailValidator implements ConstraintValidator<Email, CharSequence>
5545
"(\\." + "(" + LOCAL_PART_ATOM + "+|\"" + LOCAL_PART_INSIDE_QUOTES_ATOM + "+\")" + ")*", CASE_INSENSITIVE
5646
);
5747

58-
/**
59-
* Regular expression for the domain part of an email address (everything after '@')
60-
*/
61-
private static final Pattern DOMAIN_PATTERN = Pattern.compile(
62-
DOMAIN + "|" + IP_DOMAIN + "|" + "\\[IPv6:" + IP_V6_DOMAIN + "\\]", CASE_INSENSITIVE
63-
);
64-
6548
@Override
6649
public boolean isValid(CharSequence value, ConstraintValidatorContext context) {
6750
if ( value == null || value.length() == 0 ) {
@@ -71,7 +54,7 @@ public boolean isValid(CharSequence value, ConstraintValidatorContext context) {
7154
// cannot split email string at @ as it can be a part of quoted local part of email.
7255
// so we need to split at a position of last @ present in the string:
7356
String stringValue = value.toString();
74-
int splitPosition = stringValue.lastIndexOf( "@" );
57+
int splitPosition = stringValue.lastIndexOf( '@' );
7558

7659
// need to check if
7760
if ( splitPosition < 0 ) {
@@ -81,42 +64,19 @@ public boolean isValid(CharSequence value, ConstraintValidatorContext context) {
8164
String localPart = stringValue.substring( 0, splitPosition );
8265
String domainPart = stringValue.substring( splitPosition + 1 );
8366

84-
if ( !matchLocalPart( localPart ) ) {
67+
if ( !isValidEmailLocalPart( localPart ) ) {
8568
return false;
8669
}
8770

88-
return matchDomain( domainPart );
71+
return DomainNameUtil.isValidEmailDomainAddress( domainPart );
8972
}
9073

91-
private boolean matchLocalPart(String localPart) {
74+
private boolean isValidEmailLocalPart(String localPart) {
9275
if ( localPart.length() > MAX_LOCAL_PART_LENGTH ) {
9376
return false;
9477
}
9578
Matcher matcher = LOCAL_PART_PATTERN.matcher( localPart );
9679
return matcher.matches();
9780
}
9881

99-
private boolean matchDomain(String domain) {
100-
// if we have a trailing dot the domain part we have an invalid email address.
101-
// the regular expression match would take care of this, but IDN.toASCII drops the trailing '.'
102-
if ( domain.endsWith( "." ) ) {
103-
return false;
104-
}
105-
106-
String asciiString;
107-
try {
108-
asciiString = IDN.toASCII( domain );
109-
}
110-
catch (IllegalArgumentException e) {
111-
return false;
112-
}
113-
114-
if ( asciiString.length() > MAX_DOMAIN_PART_LENGTH ) {
115-
return false;
116-
}
117-
118-
Matcher matcher = DOMAIN_PATTERN.matcher( asciiString );
119-
return matcher.matches();
120-
}
121-
12282
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* Hibernate Validator, declare and validate application constraints
3+
*
4+
* License: Apache License, Version 2.0
5+
* See the license.txt file in the root directory or <http://www.apache.org/licenses/LICENSE-2.0>.
6+
*/
7+
package org.hibernate.validator.internal.util;
8+
9+
import static java.util.regex.Pattern.CASE_INSENSITIVE;
10+
11+
import java.net.IDN;
12+
import java.util.regex.Matcher;
13+
import java.util.regex.Pattern;
14+
15+
/**
16+
* @author Marko Bekhta
17+
*/
18+
public final class DomainNameUtil {
19+
20+
/**
21+
* This is the maximum length of a domain name. But be aware that each label (parts separated by a dot) of the
22+
* domain name must be at most 63 characters long. This is verified by {@link IDN#toASCII(String)}.
23+
*/
24+
private static final int MAX_DOMAIN_PART_LENGTH = 255;
25+
26+
private static final String DOMAIN_CHARS_WITHOUT_DASH = "[a-z\u0080-\uFFFF0-9!#$%&'*+/=?^_`{|}~]";
27+
private static final String DOMAIN_LABEL = "(" + DOMAIN_CHARS_WITHOUT_DASH + "-?)*" + DOMAIN_CHARS_WITHOUT_DASH + "+";
28+
29+
private static final String DOMAIN = DOMAIN_LABEL + "+(\\." + DOMAIN_LABEL + "+)*";
30+
31+
private static final String IP_DOMAIN = "[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}";
32+
//IP v6 regex taken from http://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses
33+
private static final String IP_V6_DOMAIN = "(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))";
34+
35+
/**
36+
* Regular expression for the domain part of an URL
37+
* <p>
38+
* A host string must be a domain string, an IPv4 address string, or "[", followed by an IPv6 address string,
39+
* followed by "]".
40+
*/
41+
private static final Pattern DOMAIN_PATTERN = Pattern.compile(
42+
DOMAIN + "|\\[" + IP_V6_DOMAIN + "\\]", CASE_INSENSITIVE
43+
);
44+
45+
/**
46+
* Regular expression for the domain part of an email address (everything after '@')
47+
*/
48+
private static final Pattern EMAIL_DOMAIN_PATTERN = Pattern.compile(
49+
DOMAIN + "|\\[" + IP_DOMAIN + "\\]|" + "\\[IPv6:" + IP_V6_DOMAIN + "\\]", CASE_INSENSITIVE
50+
);
51+
52+
private DomainNameUtil() {
53+
}
54+
55+
/**
56+
* Checks the validity of the domain name used in an email. To be valid it should be either a valid host name, or an
57+
* IP address wrapped in [].
58+
*
59+
* @param domain domain to check for validity
60+
* @return {@code true} if the provided string is a valid domain, {@code false} otherwise
61+
*/
62+
public static boolean isValidEmailDomainAddress(String domain) {
63+
return isValidDomainAddress( domain, EMAIL_DOMAIN_PATTERN );
64+
}
65+
66+
/**
67+
* Checks validity of a domain name.
68+
*
69+
* @param domain the domain to check for validity
70+
* @return {@code true} if the provided string is a valid domain, {@code false} otherwise
71+
*/
72+
public static boolean isValidDomainAddress(String domain) {
73+
return isValidDomainAddress( domain, DOMAIN_PATTERN );
74+
}
75+
76+
private static boolean isValidDomainAddress(String domain, Pattern pattern) {
77+
// if we have a trailing dot the domain part we have an invalid email address.
78+
// the regular expression match would take care of this, but IDN.toASCII drops the trailing '.'
79+
if ( domain.endsWith( "." ) ) {
80+
return false;
81+
}
82+
83+
Matcher matcher = pattern.matcher( domain );
84+
if ( !matcher.matches() ) {
85+
return false;
86+
}
87+
88+
String asciiString;
89+
try {
90+
asciiString = IDN.toASCII( domain );
91+
}
92+
catch (IllegalArgumentException e) {
93+
return false;
94+
}
95+
96+
if ( asciiString.length() > MAX_DOMAIN_PART_LENGTH ) {
97+
return false;
98+
}
99+
100+
return true;
101+
}
102+
103+
}

engine/src/test/java/org/hibernate/validator/test/internal/constraintvalidators/hv/URLValidatorTest.java

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,10 +290,91 @@ private void runUrlContainerValidation(Validator validator, URLContainer contain
290290
}
291291

292292
private void assertValidUrls(ConstraintValidator<URL, CharSequence> validator) {
293+
//valid urls
293294
assertTrue( validator.isValid( null, null ) );
295+
assertTrue( validator.isValid( "ftp://abc.de", null ) );
296+
assertTrue( validator.isValid( "http://foo.com/blah_blah", null ) );
297+
assertTrue( validator.isValid( "http://foo.com/blah_blah/", null ) );
298+
assertTrue( validator.isValid( "http://foo.com/blah_blah_(wikipedia)", null ) );
299+
assertTrue( validator.isValid( "http://foo.com/blah_blah_(wikipedia)_(again)", null ) );
300+
assertTrue( validator.isValid( "http://www.example.com/wpstyle/?p=364", null ) );
301+
assertTrue( validator.isValid( "https://www.example.com/foo/?bar=baz&inga=42&quux", null ) );
302+
assertTrue( validator.isValid( "http://✪df.ws/123", null ) );
303+
assertTrue( validator.isValid( "http://userid:password@example.com:8080", null ) );
304+
assertTrue( validator.isValid( "http://userid:password@example.com:8080/", null ) );
305+
assertTrue( validator.isValid( "http://userid@example.com", null ) );
306+
assertTrue( validator.isValid( "http://userid@example.com/", null ) );
307+
assertTrue( validator.isValid( "http://userid@example.com:8080", null ) );
308+
assertTrue( validator.isValid( "http://userid@example.com:8080/", null ) );
309+
assertTrue( validator.isValid( "http://userid:password@example.com", null ) );
310+
assertTrue( validator.isValid( "http://userid:password@example.com/", null ) );
311+
assertTrue( validator.isValid( "http://142.42.1.1/", null ) );
312+
assertTrue( validator.isValid( "http://142.42.1.1:8080/", null ) );
313+
assertTrue( validator.isValid( "http://➡.ws/䨹", null ) );
314+
assertTrue( validator.isValid( "http://⌘.ws", null ) );
315+
assertTrue( validator.isValid( "http://⌘.ws/", null ) );
316+
assertTrue( validator.isValid( "http://foo.com/blah_(wikipedia)#cite-1", null ) );
317+
assertTrue( validator.isValid( "http://foo.com/blah_(wikipedia)_blah#cite-1", null ) );
318+
assertTrue( validator.isValid( "http://foo.com/unicode_(✪)_in_parens", null ) );
319+
assertTrue( validator.isValid( "http://foo.com/(something)?after=parens", null ) );
320+
assertTrue( validator.isValid( "http://☺.damowmow.com/", null ) );
321+
assertTrue( validator.isValid( "http://code.google.com/events/#&product=browser", null ) );
322+
assertTrue( validator.isValid( "http://j.mp", null ) );
323+
assertTrue( validator.isValid( "ftp://foo.bar/baz", null ) );
324+
assertTrue( validator.isValid( "http://foo.bar/?q=Test%20URL-encoded%20stuff", null ) );
325+
assertTrue( validator.isValid( "http://مثال.إختبار", null ) );
326+
assertTrue( validator.isValid( "http://例子.测试", null ) );
327+
assertTrue( validator.isValid( "http://उदाहरण.परीक्षा", null ) );
328+
assertTrue( validator.isValid( "http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com", null ) );
329+
assertTrue( validator.isValid( "http://1337.net", null ) );
330+
assertTrue( validator.isValid( "http://a.b-c.de", null ) );
331+
assertTrue( validator.isValid( "http://223.255.255.254", null ) );
332+
assertTrue( validator.isValid( "http://[2001:0db8:0a0b:12f0:0000:0000:0000:0001]", null ) );
333+
334+
// invalid urls:
294335
assertFalse( validator.isValid( "http", null ) );
295336
assertFalse( validator.isValid( "ftp//abc.de", null ) );
296-
assertTrue( validator.isValid( "ftp://abc.de", null ) );
337+
assertFalse( validator.isValid( "//", null ) );
338+
assertFalse( validator.isValid( "//a", null ) );
339+
assertFalse( validator.isValid( "///", null ) );
340+
assertFalse( validator.isValid( "///a", null ) );
341+
assertFalse( validator.isValid( "foo.com", null ) );
342+
assertFalse( validator.isValid( ":// should fail", null ) );
343+
344+
if ( validator instanceof URLValidator ) {
345+
// 'exotic' protocols are considered valid using RegexpURLValidator but not URLValidator
346+
// as the last one doesn't allow unknown protocols
347+
assertFalse( validator.isValid( "rdar://1234", null ) );
348+
assertFalse( validator.isValid( "ftps://foo.bar/", null ) );
349+
assertFalse( validator.isValid( "h://test", null ) );
350+
}
351+
352+
if ( validator instanceof RegexpURLValidator ) {
353+
assertFalse( validator.isValid( "http://", null ) );
354+
assertFalse( validator.isValid( "http://.", null ) );
355+
assertFalse( validator.isValid( "http://..", null ) );
356+
assertFalse( validator.isValid( "http://../", null ) );
357+
assertFalse( validator.isValid( "http://?", null ) );
358+
assertFalse( validator.isValid( "http://??", null ) );
359+
assertFalse( validator.isValid( "http://??/", null ) );
360+
assertFalse( validator.isValid( "http://#", null ) );
361+
assertFalse( validator.isValid( "http://##", null ) );
362+
assertFalse( validator.isValid( "http://##/", null ) );
363+
assertFalse( validator.isValid( "http://foo.bar?q=Spaces should be encoded", null ) );
364+
assertFalse( validator.isValid( "http:///a", null ) );
365+
assertFalse( validator.isValid( "http:// shouldfail.com", null ) );
366+
assertFalse( validator.isValid( "http://foo.bar/foo(bar)baz quux", null ) );
367+
assertFalse( validator.isValid( "http://-error-.invalid/", null ) );
368+
assertFalse( validator.isValid( "http://a.b--c.de/", null ) );
369+
assertFalse( validator.isValid( "http://-a.b.co", null ) );
370+
assertFalse( validator.isValid( "http://a.b-.co", null ) );
371+
// assertFalse( validator.isValid( "http://123.123.123", null ) );
372+
// assertFalse( validator.isValid( "http://3628126748", null ) );
373+
assertFalse( validator.isValid( "http://.www.foo.bar/", null ) );
374+
assertFalse( validator.isValid( "http://www.foo.bar./", null ) );
375+
assertFalse( validator.isValid( "http://.www.foo.bar./", null ) );
376+
assertFalse( validator.isValid( "http://2001:0db8:0a0b:12f0:0000:0000:0000:0001", null ) );
377+
}
297378
}
298379

299380
private void assertValidCharSequenceUrls(ConstraintValidator<URL, CharSequence> validator) {

0 commit comments

Comments
 (0)