Permalink
Browse files

Merged /httpcomponents/httpclient/branches/branch_4_1:r755593-811107

git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@811110 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
1 parent 9ce0d5d commit d384ec4e3a5daf624a64760955c72251140efc4e @ok2c ok2c committed Sep 3, 2009
View
@@ -1,3 +1,10 @@
+Changes since 4.0
+-------------------
+
+* [HTTPCLIENT-834] Transparent content encoding support.
+ Contributed by James Abley <james.abley at gmail.com>
+
+
Release 4.0
-------------------
@@ -0,0 +1,109 @@
+/*
+ * $HeadURL$
+ * $Revision$
+ * $Date$
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ *
+ */
+
+package org.apache.http.impl.client;
+
+import java.io.IOException;
+
+import org.apache.http.Header;
+import org.apache.http.HeaderElement;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpException;
+import org.apache.http.HttpRequest;
+import org.apache.http.HttpRequestInterceptor;
+import org.apache.http.HttpResponse;
+import org.apache.http.HttpResponseInterceptor;
+import org.apache.http.protocol.HttpContext;
+
+/**
+ * Class responsible for handling Content Encoding in HTTP. This takes the form of
+ * an {@link HttpRequestInterceptor} that will modify the {@link HttpRequest} if the client hasn't
+ * already specified an <code>Accept-Encoding</code> header. There is an accompanying
+ * {@link HttpResponseInterceptor} implementation that will only examine the {@link HttpResponse}
+ * if the {@link HttpRequestInterceptor} implementation did any modifications.
+ * <p>
+ * Instances of this class are stateless, therefore they're thread-safe and immutable.
+ *
+ * @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.5
+ */
+class ContentEncodingProcessor implements HttpResponseInterceptor, HttpRequestInterceptor {
+
+ /**
+ * {@inheritDoc}
+ */
+ public void process(
+ HttpRequest request, HttpContext context) throws HttpException, IOException {
+
+ /*
+ * If a client of this library has already set this header, presume that they did so for
+ * a reason and so this instance shouldn't handle the response at all.
+ */
+ if (!request.containsHeader("Accept-Encoding")) {
+
+ /* Signal support for Accept-Encoding transfer encodings. */
+ // TODO add compress support.
+ request.addHeader("Accept-Encoding", "gzip,deflate");
+
+ /* Store the fact that the request was modified, so that we can potentially handle
+ * the response. */
+ context.setAttribute(ContentEncodingProcessor.class.getName(), Boolean.TRUE);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void process(
+ HttpResponse response, HttpContext context) throws HttpException, IOException {
+
+ if (context.getAttribute(ContentEncodingProcessor.class.getName()) != null) {
+ HttpEntity entity = response.getEntity();
+
+ if (entity != null) { // It wasn't a 304 Not Modified response, 204 No Content or similar
+ Header ceheader = entity.getContentEncoding();
+ if (ceheader != null) {
+ HeaderElement[] codecs = ceheader.getElements();
+ for (int i = 0, n = codecs.length; i < n; ++i) {
+ if ("gzip".equalsIgnoreCase(codecs[i].getName())) {
+ response.setEntity(new GzipDecompressingEntity(response.getEntity()));
+ return;
+ } else if ("deflate".equalsIgnoreCase(codecs[i].getName())) {
+ response.setEntity(new DeflateDecompressingEntity(response.getEntity()));
+ return;
+ }
+ // TODO add compress. identity is a no-op.
+ }
+ }
+ }
+ }
+ }
+
+}
@@ -0,0 +1,174 @@
+/*
+ * $HeadURL$
+ * $Revision$
+ * $Date$
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ *
+ */
+
+package org.apache.http.impl.client;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+import java.util.zip.DataFormatException;
+import java.util.zip.Inflater;
+import java.util.zip.InflaterInputStream;
+
+import org.apache.http.Header;
+import org.apache.http.HttpEntity;
+import org.apache.http.entity.HttpEntityWrapper;
+
+/**
+ * {@link HttpEntityWrapper} responsible for handling deflate Content Coded responses. In RFC2616
+ * terms, <code>deflate</code> means a <code>zlib</code> stream as defined in RFC1950. Some server
+ * implementations have misinterpreted RFC2616 to mean that a <code>deflate</code> stream as
+ * defined in RFC1951 should be used (or maybe they did that since that's how IE behaves?). It's
+ * confusing that <code>deflate</code> in HTTP 1.1 means <code>zlib</code> streams rather than
+ * <code>deflate</code> streams. We handle both types in here, since that's what is seen on the
+ * internet. Moral - prefer <code>gzip</code>!
+ */
+class DeflateDecompressingEntity extends HttpEntityWrapper {
+
+ /**
+ * Creates a new {@link DeflateDecompressingEntity} which will wrap the specified
+ * {@link HttpEntity}.
+ *
+ * @param entity
+ * a non-null {@link HttpEntity} to be wrapped
+ */
+ public DeflateDecompressingEntity(final HttpEntity entity) {
+ super(entity);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public InputStream getContent() throws IOException {
+ InputStream wrapped = this.wrappedEntity.getContent();
+
+ /*
+ * A zlib stream will have a header.
+ *
+ * CMF | FLG [| DICTID ] | ...compressed data | ADLER32 |
+ *
+ * * CMF is one byte.
+ *
+ * * FLG is one byte.
+ *
+ * * DICTID is four bytes, and only present if FLG.FDICT is set.
+ *
+ * Sniff the content. Does it look like a zlib stream, with a CMF, etc? c.f. RFC1950,
+ * section 2.2. http://tools.ietf.org/html/rfc1950#page-4
+ *
+ * We need to see if it looks like a proper zlib stream, or whether it is just a deflate
+ * stream. RFC2616 calls zlib streams deflate. Confusing, isn't it? That's why some servers
+ * implement deflate Content-Encoding using deflate streams, rather than zlib streams.
+ *
+ * We could start looking at the bytes, but to be honest, someone else has already read
+ * the RFCs and implemented that for us. So we'll just use the JDK libraries and exception
+ * handling to do this. If that proves slow, then we could potentially change this to check
+ * the first byte - does it look like a CMF? What about the second byte - does it look like
+ * a FLG, etc.
+ */
+
+ /* We read a small buffer to sniff the content. */
+ byte[] peeked = new byte[6];
+
+ PushbackInputStream pushback = new PushbackInputStream(wrapped, peeked.length);
+
+ int headerLength = pushback.read(peeked);
+
+ if (headerLength == -1) {
+ throw new IOException("Unable to read the response");
+ }
+
+ /* We try to read the first uncompressed byte. */
+ byte[] dummy = new byte[1];
+
+ Inflater inf = new Inflater();
+
+ try {
+ int n;
+ while ((n = inf.inflate(dummy)) == 0) {
+ if (inf.finished()) {
+
+ /* Not expecting this, so fail loudly. */
+ throw new IOException("Unable to read the response");
+ }
+
+ if (inf.needsDictionary()) {
+
+ /* Need dictionary - then it must be zlib stream with DICTID part? */
+ break;
+ }
+
+ if (inf.needsInput()) {
+ inf.setInput(peeked);
+ }
+ }
+
+ if (n == -1) {
+ throw new IOException("Unable to read the response");
+ }
+
+ /*
+ * We read something without a problem, so it's a valid zlib stream. Just need to reset
+ * and return an unused InputStream now.
+ */
+ pushback.unread(peeked, 0, headerLength);
+ return new InflaterInputStream(pushback);
+ } catch (DataFormatException e) {
+
+ /* Presume that it's an RFC1951 deflate stream rather than RFC1950 zlib stream and try
+ * again. */
+ pushback.unread(peeked, 0, headerLength);
+ return new InflaterInputStream(pushback, new Inflater(true));
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Header getContentEncoding() {
+
+ /* This HttpEntityWrapper has dealt with the Content-Encoding. */
+ return null;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public long getContentLength() {
+
+ /* Length of inflated content is unknown. */
+ return -1;
+ }
+
+}
@@ -0,0 +1,90 @@
+/*
+ * $HeadURL$
+ * $Revision$
+ * $Date$
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ *
+ */
+
+package org.apache.http.impl.client;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.http.Header;
+import org.apache.http.HttpEntity;
+import org.apache.http.entity.HttpEntityWrapper;
+
+/**
+ * {@link HttpEntityWrapper} for handling gzip Content Coded responses.
+ */
+class GzipDecompressingEntity extends HttpEntityWrapper {
+
+ /**
+ * Creates a new {@link GzipDecompressingEntity} which will wrap the specified
+ * {@link HttpEntity}.
+ *
+ * @param entity
+ * the non-null {@link HttpEntity} to be wrapped
+ */
+ public GzipDecompressingEntity(final HttpEntity entity) {
+ super(entity);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public InputStream getContent() throws IOException, IllegalStateException {
+
+ // the wrapped entity's getContent() decides about repeatability
+ InputStream wrappedin = wrappedEntity.getContent();
+
+ return new GZIPInputStream(wrappedin);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Header getContentEncoding() {
+
+ /* This HttpEntityWrapper has dealt with the Content-Encoding. */
+ return null;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public long getContentLength() {
+
+ /* length of ungzipped content is not known */
+ return -1;
+ }
+
+}
Oops, something went wrong.

0 comments on commit d384ec4

Please sign in to comment.