Skip to content

Commit

Permalink
allow more generic schemes and add scheme to LinkDestination
Browse files Browse the repository at this point in the history
  • Loading branch information
Simon-Laux committed Sep 8, 2023
1 parent c049958 commit 4f09af5
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 17 deletions.
10 changes: 6 additions & 4 deletions spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,13 @@ Make URLs clickable.

- `.`,`,`,`;`,`:` should not be parsed as an ending char of an inline-link(this rule is only for standalone/inline links)

#### Allowed schemes:
#### Linkified schemes:

- all Common Internet Scheme links (containing `//` after scheme)
- mailto
- news
- all Common Internet Scheme links (containing `//` after scheme),
- `mailto:`, `news:`, `feed:`
- `tel:`, `sms:`, `geo:`, `maps:`
- `bitcoin:`, `bitcoincash:`, `eth:`, `ethereum:`
- `magnet:`

##### `mailto:email@address.example.com`

Expand Down
104 changes: 92 additions & 12 deletions src/parser/link_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ pub struct LinkDestination<'a> {
/// contains data for the punycode warning if punycode was detected
/// (the host part contains non ascii unicode characters)
pub punycode: Option<PunycodeWarning>,
/// scheme
pub scheme: &'a str,
}

#[derive(Debug, PartialEq, Eq, Serialize)]
Expand All @@ -41,9 +43,23 @@ pub struct PunycodeWarning {
punycode_encoded_url: String,
}

/// determines which schemes get linkifyed
fn is_allowed_scheme(scheme: &str) -> bool {
matches!(scheme.to_ascii_lowercase().as_ref(), "mailto" | "news")
/// determines which generic schemes (without '://') get linkifyed
fn is_allowed_generic_scheme(scheme: &str) -> bool {
matches!(
scheme.to_ascii_lowercase().as_ref(),
"mailto"
| "news"
| "feed"
| "tel"
| "sms"
| "geo"
| "maps"
| "bitcoin"
| "bitcoincash"
| "eth"
| "ethereum"
| "magnet"
)
}

impl LinkDestination<'_> {
Expand All @@ -53,11 +69,12 @@ impl LinkDestination<'_> {
input: &str,
) -> IResult<&str, LinkDestination, CustomError<&str>> {
if let Ok((rest, (link, info))) = parse_url(input) {
let (hostname, punycode) = match info {
let (hostname, punycode, scheme) = match info {
UrlInfo::CommonInternetSchemeURL {
has_puny_code_in_host_name,
hostname,
ascii_hostname,
scheme,
} => {
if has_puny_code_in_host_name {
(
Expand All @@ -67,16 +84,17 @@ impl LinkDestination<'_> {
punycode_encoded_url: link.replacen(hostname, &ascii_hostname, 1),
ascii_hostname,
}),
scheme,
)
} else {
(Some(hostname), None)
(Some(hostname), None, scheme)
}
}
UrlInfo::GenericUrl { scheme } => {
if !is_allowed_scheme(scheme) {
if !is_allowed_generic_scheme(scheme) {
return Err(nom::Err::Error(CustomError::InvalidLink));
}
(None, None)
(None, None, scheme)
}
};

Expand All @@ -86,6 +104,7 @@ impl LinkDestination<'_> {
target: link,
hostname,
punycode,
scheme,
},
))
} else {
Expand All @@ -95,11 +114,12 @@ impl LinkDestination<'_> {

pub fn parse(input: &str) -> IResult<&str, LinkDestination, CustomError<&str>> {
if let Ok((rest, (link, info))) = parse_url(input) {
let (hostname, punycode) = match info {
let (hostname, punycode, scheme) = match info {
UrlInfo::CommonInternetSchemeURL {
has_puny_code_in_host_name,
hostname,
ascii_hostname,
scheme,
} => {
if has_puny_code_in_host_name {
(
Expand All @@ -109,12 +129,13 @@ impl LinkDestination<'_> {
punycode_encoded_url: link.replacen(hostname, &ascii_hostname, 1),
ascii_hostname,
}),
scheme,
)
} else {
(Some(hostname), None)
(Some(hostname), None, scheme)
}
}
UrlInfo::GenericUrl { .. } => (None, None),
UrlInfo::GenericUrl { scheme, .. } => (None, None, scheme),
};

Ok((
Expand All @@ -123,6 +144,7 @@ impl LinkDestination<'_> {
target: link,
hostname,
punycode,
scheme,
},
))
} else {
Expand All @@ -138,6 +160,7 @@ enum UrlInfo<'a> {
has_puny_code_in_host_name: bool,
hostname: &'a str,
ascii_hostname: String,
scheme: &'a str,
},
GenericUrl {
scheme: &'a str,
Expand Down Expand Up @@ -352,6 +375,7 @@ fn url_intern<'a>(input: &'a str) -> IResult<&'a str, UrlInfo<'a>, LinkParseErro
Ok((
input,
UrlInfo::CommonInternetSchemeURL {
scheme,
hostname: host,
has_puny_code_in_host_name: is_puny,
ascii_hostname: if is_puny {
Expand Down Expand Up @@ -443,7 +467,8 @@ mod test {
UrlInfo::CommonInternetSchemeURL {
hostname: "münchen.de",
has_puny_code_in_host_name: true,
ascii_hostname: "xn--mnchen-3ya.de".to_owned()
ascii_hostname: "xn--mnchen-3ya.de".to_owned(),
scheme: "http"
}
)
);
Expand All @@ -455,9 +480,64 @@ mod test {
UrlInfo::CommonInternetSchemeURL {
hostname: "muenchen.de",
has_puny_code_in_host_name: false,
ascii_hostname: "muenchen.de".to_owned()
ascii_hostname: "muenchen.de".to_owned(),
scheme: "http"
}
)
);
}

#[test]
fn common_schemes() {
assert_eq!(
parse_url("http://delta.chat").unwrap().1,
(
"http://delta.chat",
UrlInfo::CommonInternetSchemeURL {
hostname: "delta.chat",
has_puny_code_in_host_name: false,
ascii_hostname: "delta.chat".to_owned(),
scheme: "http"
}
)
);
assert_eq!(
parse_url("https://delta.chat").unwrap().1,
(
"https://delta.chat",
UrlInfo::CommonInternetSchemeURL {
hostname: "delta.chat",
has_puny_code_in_host_name: false,
ascii_hostname: "delta.chat".to_owned(),
scheme: "https"
}
)
);
}
#[test]
fn generic_schemes() {
assert_eq!(
parse_url("mailto:someone@example.com").unwrap().1,
(
"mailto:someone@example.com",
UrlInfo::GenericUrl { scheme: "mailto" }
)
);
assert_eq!(
parse_url("bitcoin:bc1qt3xhfvwmdqvxkk089tllvvtzqs8ts06u3u6qka")
.unwrap()
.1,
(
"bitcoin:bc1qt3xhfvwmdqvxkk089tllvvtzqs8ts06u3u6qka",
UrlInfo::GenericUrl { scheme: "bitcoin" }
)
);
assert_eq!(
parse_url("geo:37.786971,-122.399677").unwrap().1,
(
"geo:37.786971,-122.399677",
UrlInfo::GenericUrl { scheme: "geo" }
)
);
}
}
3 changes: 2 additions & 1 deletion tests/text_to_ast/desktop_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,8 @@ fn link() {
destination: LinkDestination {
target: _,
punycode: None,
hostname: _
hostname: _,
scheme: _,
}
}
));
Expand Down
1 change: 1 addition & 0 deletions tests/text_to_ast/text_only.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ fn link() {
target: input,
hostname: Some(hostname),
punycode: None,
scheme: "http"
}
}]
);
Expand Down

0 comments on commit 4f09af5

Please sign in to comment.