Skip to content

Commit

Permalink
Separate decoding of encoded characters and merging
Browse files Browse the repository at this point in the history
  • Loading branch information
djc committed Jul 8, 2020
1 parent 89c0ce5 commit e040158
Showing 1 changed file with 26 additions and 13 deletions.
39 changes: 26 additions & 13 deletions idna/src/punycode.rs
Expand Up @@ -52,6 +52,12 @@ pub fn decode_to_string(input: &str) -> Option<String> {
/// Overflow can only happen on inputs that take more than
/// 63 encoded bytes, the DNS limit on domain name labels.
pub fn decode(input: &str) -> Option<Vec<char>> {
let (base, mut buf) = insertions(input).ok()?;
Some(merge(base, &mut buf))
}

/// Split the input iterator and return a Vec with insertions of decoded characters
fn insertions<'a>(input: &'a str) -> Result<(&'a str, Vec<(usize, char)>), ()> {
// Handle "basic" (ASCII) code points.
// They are encoded as-is before the last delimiter, if any.
let (base, input) = match input.rfind(DELIMITER) {
Expand Down Expand Up @@ -87,10 +93,10 @@ pub fn decode(input: &str) -> Option<Vec<char>> {
byte @ b'0'..=b'9' => byte - b'0' + 26,
byte @ b'A'..=b'Z' => byte - b'A',
byte @ b'a'..=b'z' => byte - b'a',
_ => return None,
_ => return Err(()),
} as u32;
if digit > (u32::MAX - i) / weight {
return None; // Overflow
return Err(()); // Overflow
}
i += digit * weight;
let t = if k <= bias {
Expand All @@ -104,26 +110,26 @@ pub fn decode(input: &str) -> Option<Vec<char>> {
break;
}
if weight > u32::MAX / (BASE - t) {
return None; // Overflow
return Err(()); // Overflow
}
weight *= BASE - t;
k += BASE;
byte = match iter.next() {
None => return None, // End of input before the end of this delta
None => return Err(()), // End of input before the end of this delta
Some(byte) => byte,
};
}
bias = adapt(i - previous_i, length + 1, previous_i == 0);
if i / (length + 1) > u32::MAX - code_point {
return None; // Overflow
return Err(()); // Overflow
}
// i was supposed to wrap around from length+1 to 0,
// incrementing code_point each time.
code_point += i / (length + 1);
i %= length + 1;
let c = match char::from_u32(code_point) {
Some(c) => c,
None => return None,
None => return Err(()),
};

// Move earlier insertions farther out in the string
Expand All @@ -137,16 +143,23 @@ pub fn decode(input: &str) -> Option<Vec<char>> {
i += 1;
}

buf.sort_by_key(|(i, _)| -(*i as i32));
buf.sort_by_key(|(i, _)| *i);
Ok((base, buf))
}

/// Merge base character iterator and decoded character insertions
fn merge(input: &str, insertions: &[(usize, char)]) -> Vec<char> {
let mut insertions = insertions.iter();
let mut position = 0;
let mut output = Vec::with_capacity(input.len());
let mut next = buf.pop();
let mut base = base.chars();
let mut next = insertions.next();
let mut base = input.chars();

loop {
match next {
Some((pos, c)) if pos == position => {
output.push(c);
next = buf.pop();
Some((pos, c)) if *pos == position => {
output.push(*c);
next = insertions.next();
position += 1;
continue;
}
Expand All @@ -160,7 +173,7 @@ pub fn decode(input: &str) -> Option<Vec<char>> {
}
}

Some(output)
output
}

/// Convert an Unicode `str` to Punycode.
Expand Down

0 comments on commit e040158

Please sign in to comment.