Skip to content

Commit

Permalink
Merge pull request #184 from ducaale/custom-encoding
Browse files Browse the repository at this point in the history
Support overwriting response's mime and charset
  • Loading branch information
ducaale committed Nov 8, 2021
2 parents 8097b6b + 8e7fd1f commit 1f0d775
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 68 deletions.
132 changes: 101 additions & 31 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use std::path::PathBuf;
use std::str::FromStr;
use std::time::Duration;

use anyhow::anyhow;
use encoding_rs::Encoding;
use reqwest::{Method, Url};
use serde::{Deserialize, Serialize};
use structopt::clap::{self, arg_enum, AppSettings, Error, ErrorKind, Result};
Expand Down Expand Up @@ -66,6 +68,20 @@ pub struct Cli {
#[structopt(short = "s", long, value_name = "THEME", possible_values = &Theme::variants(), case_insensitive = true)]
pub style: Option<Theme>,

/// Override the response encoding for terminal display purposes.
///
/// Example: `--response-charset=latin1`
/// {n}{n}{n}
#[structopt(long, value_name = "ENCODING", parse(try_from_str = parse_encoding))]
pub response_charset: Option<&'static Encoding>,

/// Override the response mime type for coloring and formatting for the terminal
///
/// Example: `--response-mime=application/json`
/// {n}{n}{n}
#[structopt(long, value_name = "MIME_TYPE")]
pub response_mime: Option<String>,

/// String specifying what the output should contain.
///
/// Use `H` and `B` for request header and body respectively,
Expand Down Expand Up @@ -794,8 +810,8 @@ impl Print {
}

impl FromStr for Print {
type Err = Error;
fn from_str(s: &str) -> Result<Print> {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Print> {
let mut request_headers = false;
let mut request_body = false;
let mut response_headers = false;
Expand All @@ -807,12 +823,7 @@ impl FromStr for Print {
'B' => request_body = true,
'h' => response_headers = true,
'b' => response_body = true,
char => {
return Err(Error::with_description(
&format!("{:?} is not a valid value", char),
ErrorKind::InvalidValue,
))
}
char => return Err(anyhow!("{:?} is not a valid value", char)),
}
}

Expand All @@ -836,17 +847,12 @@ impl Timeout {
}

impl FromStr for Timeout {
type Err = Error;
type Err = anyhow::Error;

fn from_str(sec: &str) -> Result<Timeout> {
fn from_str(sec: &str) -> anyhow::Result<Timeout> {
let pos_sec: f64 = match sec.parse::<f64>() {
Ok(sec) if sec.is_sign_positive() => sec,
_ => {
return Err(Error::with_description(
"Invalid seconds as connection timeout",
ErrorKind::InvalidValue,
))
}
_ => return Err(anyhow!("Invalid seconds as connection timeout")),
};

let dur = Duration::from_secs_f64(pos_sec);
Expand All @@ -862,35 +868,30 @@ pub enum Proxy {
}

impl FromStr for Proxy {
type Err = Error;
type Err = anyhow::Error;

fn from_str(s: &str) -> Result<Self> {
fn from_str(s: &str) -> anyhow::Result<Self> {
let split_arg: Vec<&str> = s.splitn(2, ':').collect();
match split_arg[..] {
[protocol, url] => {
let url = reqwest::Url::try_from(url).map_err(|e| {
Error::with_description(
&format!(
"Invalid proxy URL '{}' for protocol '{}': {}",
url, protocol, e
),
ErrorKind::InvalidValue,
anyhow!(
"Invalid proxy URL '{}' for protocol '{}': {}",
url,
protocol,
e
)
})?;

match protocol.to_lowercase().as_str() {
"http" => Ok(Proxy::Http(url)),
"https" => Ok(Proxy::Https(url)),
"all" => Ok(Proxy::All(url)),
_ => Err(Error::with_description(
&format!("Unknown protocol to set a proxy for: {}", protocol),
ErrorKind::InvalidValue,
)),
_ => Err(anyhow!("Unknown protocol to set a proxy for: {}", protocol)),
}
}
_ => Err(Error::with_description(
"The value passed to --proxy should be formatted as <PROTOCOL>:<PROXY_URL>",
ErrorKind::InvalidValue,
_ => Err(anyhow!(
"The value passed to --proxy should be formatted as <PROTOCOL>:<PROXY_URL>"
)),
}
}
Expand Down Expand Up @@ -958,6 +959,49 @@ impl FromStr for HttpVersion {
}
}

// HTTPie recognizes some encoding names that encoding_rs doesn't e.g utf16 has to spelled as utf-16.
// There are also some encodings which encoding_rs doesn't support but HTTPie does e.g utf-7.
// See https://github.com/ducaale/xh/pull/184#pullrequestreview-787528027
fn parse_encoding(encoding: &str) -> anyhow::Result<&'static Encoding> {
let normalized_encoding = encoding.to_lowercase().replace(
|c: char| (!c.is_alphanumeric() && c != '_' && c != '-' && c != ':'),
"",
);

match normalized_encoding.as_str() {
"u8" | "utf" => return Ok(encoding_rs::UTF_8),
"u16" => return Ok(encoding_rs::UTF_16LE),
_ => (),
}

for encoding in &[
&normalized_encoding,
&normalized_encoding.replace(&['-', '_'][..], ""),
&normalized_encoding.replace('_', "-"),
&normalized_encoding.replace('-', "_"),
] {
if let Some(encoding) = Encoding::for_label(encoding.as_bytes()) {
return Ok(encoding);
}
}

{
let mut encoding = normalized_encoding.replace(&['-', '_'][..], "");
if let Some(first_digit_index) = encoding.find(|c: char| c.is_digit(10)) {
encoding.insert(first_digit_index, '-');
if let Some(encoding) = Encoding::for_label(encoding.as_bytes()) {
return Ok(encoding);
}
}
}

Err(anyhow::anyhow!(
"{} is not a supported encoding, please refer to https://encoding.spec.whatwg.org/#names-and-labels \
for supported encodings",
encoding
))
}

/// Based on the function used by clap to abort
fn safe_exit() -> ! {
let _ = std::io::stdout().lock().flush();
Expand Down Expand Up @@ -1284,4 +1328,30 @@ mod tests {
let cli = parse(&["--no-check-status", "--check-status", ":"]).unwrap();
assert_eq!(cli.check_status, Some(true));
}

#[test]
fn parse_encoding_label() {
let test_cases = vec![
("~~~~UtF////16@@", encoding_rs::UTF_16LE),
("utf16", encoding_rs::UTF_16LE),
("utf_16_be", encoding_rs::UTF_16BE),
("utf16be", encoding_rs::UTF_16BE),
("utf-16-be", encoding_rs::UTF_16BE),
("utf_8", encoding_rs::UTF_8),
("utf8", encoding_rs::UTF_8),
("utf-8", encoding_rs::UTF_8),
("u8", encoding_rs::UTF_8),
("iso8859_6", encoding_rs::ISO_8859_6),
("iso_8859-2:1987", encoding_rs::ISO_8859_2),
("l1", encoding_rs::WINDOWS_1252),
("elot-928", encoding_rs::ISO_8859_7),
];

for (input, output) in test_cases {
assert_eq!(parse_encoding(input).unwrap(), output)
}

assert_eq!(parse_encoding("notreal").is_err(), true);
assert_eq!(parse_encoding("").is_err(), true);
}
}
7 changes: 5 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,9 @@ fn run(args: Cli) -> Result<i32> {
let pretty = args.pretty.unwrap_or_else(|| buffer.guess_pretty());
let mut printer = Printer::new(print.clone(), pretty, args.style, args.stream, buffer);

let response_charset = args.response_charset;
let response_mime = args.response_mime.as_deref();

printer.print_request_headers(&request, &*cookie_jar)?;
printer.print_request_body(&mut request)?;

Expand All @@ -411,7 +414,7 @@ fn run(args: Cli) -> Result<i32> {
if args.all {
client.on_redirect(|prev_response, next_request| {
printer.print_response_headers(&prev_response)?;
printer.print_response_body(prev_response)?;
printer.print_response_body(prev_response, response_charset, response_mime)?;
printer.print_separator()?;
printer.print_request_headers(next_request, &*cookie_jar)?;
printer.print_request_body(next_request)?;
Expand Down Expand Up @@ -450,7 +453,7 @@ fn run(args: Cli) -> Result<i32> {
)?;
}
} else {
printer.print_response_body(response)?;
printer.print_response_body(response, response_charset, response_mime)?;
}
}

Expand Down
87 changes: 52 additions & 35 deletions src/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,12 +393,21 @@ impl Printer {
Ok(())
}

pub fn print_response_body(&mut self, mut response: Response) -> anyhow::Result<()> {
pub fn print_response_body(
&mut self,
mut response: Response,
encoding: Option<&'static Encoding>,
mime: Option<&str>,
) -> anyhow::Result<()> {
if !self.print.response_body {
return Ok(());
}

let content_type = get_content_type(response.headers());
let content_type = mime
.map(ContentType::from)
.unwrap_or_else(|| get_content_type(response.headers()));
let encoding = encoding.unwrap_or_else(|| guess_encoding(&response));

if !self.buffer.is_terminal() {
if (self.color || self.indent_json) && content_type.is_text() {
// The user explicitly asked for formatting even though this is
Expand All @@ -414,9 +423,13 @@ impl Printer {
// Unconditionally decoding is not an option because the body
// might not be text at all
if self.stream {
self.print_body_stream(content_type, &mut decode_stream(&mut response))?;
self.print_body_stream(
content_type,
&mut decode_stream(&mut response, encoding),
)?;
} else {
let text = response.text()?;
let bytes = response.bytes()?;
let (text, _, _) = encoding.decode(&bytes);
self.print_body_text(content_type, &text)?;
}
} else if self.stream {
Expand All @@ -426,7 +439,8 @@ impl Printer {
self.buffer.print(&body)?;
}
} else if self.stream {
match self.print_body_stream(content_type, &mut decode_stream(&mut response)) {
match self.print_body_stream(content_type, &mut decode_stream(&mut response, encoding))
{
Ok(_) => {
self.buffer.print("\n")?;
}
Expand All @@ -436,8 +450,9 @@ impl Printer {
Err(err) => return Err(err.into()),
}
} else {
// Note that .text() behaves like String::from_utf8_lossy()
let text = response.text()?;
// Note that .decode() behaves like String::from_utf8_lossy()
let bytes = response.bytes()?;
let (text, _, _) = encoding.decode(&bytes);
if text.contains('\0') {
self.buffer.print(BINARY_SUPPRESSOR)?;
return Ok(());
Expand Down Expand Up @@ -470,35 +485,39 @@ impl ContentType {
}
}

impl From<&str> for ContentType {
fn from(content_type: &str) -> Self {
if content_type.contains("json") {
ContentType::Json
} else if content_type.contains("html") {
ContentType::Html
} else if content_type.contains("xml") {
ContentType::Xml
} else if content_type.contains("multipart") {
ContentType::Multipart
} else if content_type.contains("x-www-form-urlencoded") {
ContentType::UrlencodedForm
} else if content_type.contains("javascript") {
ContentType::JavaScript
} else if content_type.contains("css") {
ContentType::Css
} else if content_type.contains("text") {
// We later check if this one's JSON
// HTTPie checks for "json", "javascript" and "text" in one place:
// https://github.com/httpie/httpie/blob/a32ad344dd/httpie/output/formatters/json.py#L14
// We have it more spread out but it behaves more or less the same
ContentType::Text
} else {
ContentType::Unknown
}
}
}

pub fn get_content_type(headers: &HeaderMap) -> ContentType {
headers
.get(CONTENT_TYPE)
.and_then(|value| value.to_str().ok())
.and_then(|content_type| {
if content_type.contains("json") {
Some(ContentType::Json)
} else if content_type.contains("html") {
Some(ContentType::Html)
} else if content_type.contains("xml") {
Some(ContentType::Xml)
} else if content_type.contains("multipart") {
Some(ContentType::Multipart)
} else if content_type.contains("x-www-form-urlencoded") {
Some(ContentType::UrlencodedForm)
} else if content_type.contains("javascript") {
Some(ContentType::JavaScript)
} else if content_type.contains("css") {
Some(ContentType::Css)
} else if content_type.contains("text") {
// We later check if this one's JSON
// HTTPie checks for "json", "javascript" and "text" in one place:
// https://github.com/httpie/httpie/blob/a32ad344dd/httpie/output/formatters/json.py#L14
// We have it more spread out but it behaves more or less the same
Some(ContentType::Text)
} else {
None
}
})
.map(ContentType::from)
.unwrap_or(ContentType::Unknown)
}

Expand All @@ -512,9 +531,7 @@ pub fn valid_json(text: &str) -> bool {
/// but it makes no guarantees about outputting valid UTF-8 if the input is
/// invalid UTF-8 (claiming to be UTF-8). So only pass data through here
/// that's going to the terminal, and don't trust its output.
fn decode_stream(response: &mut Response) -> impl Read + '_ {
let encoding = guess_encoding(response);

fn decode_stream<'a>(response: &'a mut Response, encoding: &'static Encoding) -> impl Read + 'a {
DecodeReaderBytesBuilder::new()
.encoding(Some(encoding))
.build(response)
Expand Down
Loading

0 comments on commit 1f0d775

Please sign in to comment.