Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support overwriting response's mime and charset #184

Merged
merged 7 commits into from
Nov 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 101 additions & 31 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use std::path::PathBuf;
use std::str::FromStr;
use std::time::Duration;

use anyhow::anyhow;
use encoding_rs::Encoding;
use reqwest::{Method, Url};
use serde::{Deserialize, Serialize};
use structopt::clap::{self, arg_enum, AppSettings, Error, ErrorKind, Result};
Expand Down Expand Up @@ -65,6 +67,20 @@ pub struct Cli {
#[structopt(short = "s", long, value_name = "THEME", possible_values = &Theme::variants(), case_insensitive = true)]
pub style: Option<Theme>,

/// Override the response encoding for terminal display purposes.
///
/// Example: `--response-charset=latin1`
/// {n}{n}{n}
#[structopt(long, value_name = "ENCODING", parse(try_from_str = parse_encoding))]
pub response_charset: Option<&'static Encoding>,

/// Override the response mime type for coloring and formatting for the terminal
///
/// Example: `--response-mime=application/json`
/// {n}{n}{n}
#[structopt(long, value_name = "MIME_TYPE")]
pub response_mime: Option<String>,

/// String specifying what the output should contain.
///
/// Use `H` and `B` for request header and body respectively,
Expand Down Expand Up @@ -793,8 +809,8 @@ impl Print {
}

impl FromStr for Print {
type Err = Error;
fn from_str(s: &str) -> Result<Print> {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Print> {
let mut request_headers = false;
let mut request_body = false;
let mut response_headers = false;
Expand All @@ -806,12 +822,7 @@ impl FromStr for Print {
'B' => request_body = true,
'h' => response_headers = true,
'b' => response_body = true,
char => {
return Err(Error::with_description(
&format!("{:?} is not a valid value", char),
ErrorKind::InvalidValue,
))
}
char => return Err(anyhow!("{:?} is not a valid value", char)),
}
}

Expand All @@ -835,17 +846,12 @@ impl Timeout {
}

impl FromStr for Timeout {
type Err = Error;
type Err = anyhow::Error;

fn from_str(sec: &str) -> Result<Timeout> {
fn from_str(sec: &str) -> anyhow::Result<Timeout> {
let pos_sec: f64 = match sec.parse::<f64>() {
Ok(sec) if sec.is_sign_positive() => sec,
_ => {
return Err(Error::with_description(
"Invalid seconds as connection timeout",
ErrorKind::InvalidValue,
))
}
_ => return Err(anyhow!("Invalid seconds as connection timeout")),
};

let dur = Duration::from_secs_f64(pos_sec);
Expand All @@ -861,35 +867,30 @@ pub enum Proxy {
}

impl FromStr for Proxy {
type Err = Error;
type Err = anyhow::Error;

fn from_str(s: &str) -> Result<Self> {
fn from_str(s: &str) -> anyhow::Result<Self> {
let split_arg: Vec<&str> = s.splitn(2, ':').collect();
match split_arg[..] {
[protocol, url] => {
let url = reqwest::Url::try_from(url).map_err(|e| {
Error::with_description(
&format!(
"Invalid proxy URL '{}' for protocol '{}': {}",
url, protocol, e
),
ErrorKind::InvalidValue,
anyhow!(
"Invalid proxy URL '{}' for protocol '{}': {}",
url,
protocol,
e
)
})?;

match protocol.to_lowercase().as_str() {
"http" => Ok(Proxy::Http(url)),
"https" => Ok(Proxy::Https(url)),
"all" => Ok(Proxy::All(url)),
_ => Err(Error::with_description(
&format!("Unknown protocol to set a proxy for: {}", protocol),
ErrorKind::InvalidValue,
)),
_ => Err(anyhow!("Unknown protocol to set a proxy for: {}", protocol)),
}
}
_ => Err(Error::with_description(
"The value passed to --proxy should be formatted as <PROTOCOL>:<PROXY_URL>",
ErrorKind::InvalidValue,
_ => Err(anyhow!(
"The value passed to --proxy should be formatted as <PROTOCOL>:<PROXY_URL>"
)),
}
}
Expand Down Expand Up @@ -957,6 +958,49 @@ impl FromStr for HttpVersion {
}
}

// HTTPie recognizes some encoding names that encoding_rs doesn't e.g utf16 has to spelled as utf-16.
// There are also some encodings which encoding_rs doesn't support but HTTPie does e.g utf-7.
// See https://github.com/ducaale/xh/pull/184#pullrequestreview-787528027
fn parse_encoding(encoding: &str) -> anyhow::Result<&'static Encoding> {
let normalized_encoding = encoding.to_lowercase().replace(
|c: char| (!c.is_alphanumeric() && c != '_' && c != '-' && c != ':'),
"",
);

match normalized_encoding.as_str() {
"u8" | "utf" => return Ok(encoding_rs::UTF_8),
"u16" => return Ok(encoding_rs::UTF_16LE),
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

encoding_rs associates the label utf-16 with UTF_16LE but I am not sure if it is the same in Python.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also don't think that encoding_rs supports utf-32

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On my phone (ARM) it's little-endian:

>>> 'a'.encode('utf_16_be').decode('utf16')
'愀'
>>> 'a'.encode('utf_16_le').decode('utf16')
'a'

I wouldn't be surprised if it depended on the machine's architecture. x86 is also little-endian so we'd agree on most machines.
In any case, encoding-rs is made for the web, so if it disagrees with Python then it's probably Python which is wrong.

_ => (),
}

for encoding in &[
&normalized_encoding,
&normalized_encoding.replace(&['-', '_'][..], ""),
&normalized_encoding.replace('_', "-"),
&normalized_encoding.replace('-', "_"),
] {
if let Some(encoding) = Encoding::for_label(encoding.as_bytes()) {
return Ok(encoding);
}
}

{
let mut encoding = normalized_encoding.replace(&['-', '_'][..], "");
if let Some(first_digit_index) = encoding.find(|c: char| c.is_digit(10)) {
encoding.insert(first_digit_index, '-');
if let Some(encoding) = Encoding::for_label(encoding.as_bytes()) {
return Ok(encoding);
}
}
}

Err(anyhow::anyhow!(
"{} is not a supported encoding, please refer to https://encoding.spec.whatwg.org/#names-and-labels \
for supported encodings",
encoding
))
}

/// Based on the function used by clap to abort
fn safe_exit() -> ! {
let _ = std::io::stdout().lock().flush();
Expand Down Expand Up @@ -1283,4 +1327,30 @@ mod tests {
let cli = parse(&["--no-check-status", "--check-status", ":"]).unwrap();
assert_eq!(cli.check_status, Some(true));
}

#[test]
fn parse_encoding_label() {
let test_cases = vec![
("~~~~UtF////16@@", encoding_rs::UTF_16LE),
("utf16", encoding_rs::UTF_16LE),
("utf_16_be", encoding_rs::UTF_16BE),
("utf16be", encoding_rs::UTF_16BE),
("utf-16-be", encoding_rs::UTF_16BE),
("utf_8", encoding_rs::UTF_8),
("utf8", encoding_rs::UTF_8),
("utf-8", encoding_rs::UTF_8),
("u8", encoding_rs::UTF_8),
("iso8859_6", encoding_rs::ISO_8859_6),
("iso_8859-2:1987", encoding_rs::ISO_8859_2),
("l1", encoding_rs::WINDOWS_1252),
("elot-928", encoding_rs::ISO_8859_7),
];

for (input, output) in test_cases {
assert_eq!(parse_encoding(input).unwrap(), output)
}

assert_eq!(parse_encoding("notreal").is_err(), true);
assert_eq!(parse_encoding("").is_err(), true);
}
}
7 changes: 5 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,9 @@ fn run(args: Cli) -> Result<i32> {
let pretty = args.pretty.unwrap_or_else(|| buffer.guess_pretty());
let mut printer = Printer::new(print.clone(), pretty, args.style, args.stream, buffer);

let response_charset = args.response_charset;
let response_mime = args.response_mime.as_deref();

printer.print_request_headers(&request, &*cookie_jar)?;
printer.print_request_body(&mut request)?;

Expand All @@ -411,7 +414,7 @@ fn run(args: Cli) -> Result<i32> {
if args.all {
client.on_redirect(|prev_response, next_request| {
printer.print_response_headers(&prev_response)?;
printer.print_response_body(prev_response)?;
printer.print_response_body(prev_response, response_charset, response_mime)?;
printer.print_separator()?;
printer.print_request_headers(next_request, &*cookie_jar)?;
printer.print_request_body(next_request)?;
Expand Down Expand Up @@ -450,7 +453,7 @@ fn run(args: Cli) -> Result<i32> {
)?;
}
} else {
printer.print_response_body(response)?;
printer.print_response_body(response, response_charset, response_mime)?;
}
}

Expand Down
87 changes: 52 additions & 35 deletions src/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,12 +393,21 @@ impl Printer {
Ok(())
}

pub fn print_response_body(&mut self, mut response: Response) -> anyhow::Result<()> {
pub fn print_response_body(
&mut self,
mut response: Response,
encoding: Option<&'static Encoding>,
mime: Option<&str>,
) -> anyhow::Result<()> {
if !self.print.response_body {
return Ok(());
}

let content_type = get_content_type(response.headers());
let content_type = mime
.map(ContentType::from)
.unwrap_or_else(|| get_content_type(response.headers()));
let encoding = encoding.unwrap_or_else(|| guess_encoding(&response));

if !self.buffer.is_terminal() {
if (self.color || self.indent_json) && content_type.is_text() {
// The user explicitly asked for formatting even though this is
Expand All @@ -414,9 +423,13 @@ impl Printer {
// Unconditionally decoding is not an option because the body
// might not be text at all
if self.stream {
self.print_body_stream(content_type, &mut decode_stream(&mut response))?;
self.print_body_stream(
content_type,
&mut decode_stream(&mut response, encoding),
)?;
} else {
let text = response.text()?;
let bytes = response.bytes()?;
let (text, _, _) = encoding.decode(&bytes);
self.print_body_text(content_type, &text)?;
}
} else if self.stream {
Expand All @@ -426,7 +439,8 @@ impl Printer {
self.buffer.print(&body)?;
}
} else if self.stream {
match self.print_body_stream(content_type, &mut decode_stream(&mut response)) {
match self.print_body_stream(content_type, &mut decode_stream(&mut response, encoding))
{
Ok(_) => {
self.buffer.print("\n")?;
}
Expand All @@ -436,8 +450,9 @@ impl Printer {
Err(err) => return Err(err.into()),
}
} else {
// Note that .text() behaves like String::from_utf8_lossy()
let text = response.text()?;
// Note that .decode() behaves like String::from_utf8_lossy()
let bytes = response.bytes()?;
let (text, _, _) = encoding.decode(&bytes);
if text.contains('\0') {
self.buffer.print(BINARY_SUPPRESSOR)?;
return Ok(());
Expand Down Expand Up @@ -470,35 +485,39 @@ impl ContentType {
}
}

impl From<&str> for ContentType {
fn from(content_type: &str) -> Self {
if content_type.contains("json") {
ContentType::Json
} else if content_type.contains("html") {
ContentType::Html
} else if content_type.contains("xml") {
ContentType::Xml
} else if content_type.contains("multipart") {
ContentType::Multipart
} else if content_type.contains("x-www-form-urlencoded") {
ContentType::UrlencodedForm
} else if content_type.contains("javascript") {
ContentType::JavaScript
} else if content_type.contains("css") {
ContentType::Css
} else if content_type.contains("text") {
// We later check if this one's JSON
// HTTPie checks for "json", "javascript" and "text" in one place:
// https://github.com/httpie/httpie/blob/a32ad344dd/httpie/output/formatters/json.py#L14
// We have it more spread out but it behaves more or less the same
ContentType::Text
} else {
ContentType::Unknown
}
}
}

pub fn get_content_type(headers: &HeaderMap) -> ContentType {
headers
.get(CONTENT_TYPE)
.and_then(|value| value.to_str().ok())
.and_then(|content_type| {
if content_type.contains("json") {
Some(ContentType::Json)
} else if content_type.contains("html") {
Some(ContentType::Html)
} else if content_type.contains("xml") {
Some(ContentType::Xml)
} else if content_type.contains("multipart") {
Some(ContentType::Multipart)
} else if content_type.contains("x-www-form-urlencoded") {
Some(ContentType::UrlencodedForm)
} else if content_type.contains("javascript") {
Some(ContentType::JavaScript)
} else if content_type.contains("css") {
Some(ContentType::Css)
} else if content_type.contains("text") {
// We later check if this one's JSON
// HTTPie checks for "json", "javascript" and "text" in one place:
// https://github.com/httpie/httpie/blob/a32ad344dd/httpie/output/formatters/json.py#L14
// We have it more spread out but it behaves more or less the same
Some(ContentType::Text)
} else {
None
}
})
.map(ContentType::from)
.unwrap_or(ContentType::Unknown)
}

Expand All @@ -512,9 +531,7 @@ pub fn valid_json(text: &str) -> bool {
/// but it makes no guarantees about outputting valid UTF-8 if the input is
/// invalid UTF-8 (claiming to be UTF-8). So only pass data through here
/// that's going to the terminal, and don't trust its output.
fn decode_stream(response: &mut Response) -> impl Read + '_ {
let encoding = guess_encoding(response);

fn decode_stream<'a>(response: &'a mut Response, encoding: &'static Encoding) -> impl Read + 'a {
DecodeReaderBytesBuilder::new()
.encoding(Some(encoding))
.build(response)
Expand Down
Loading