From a2771dc0d1f551b8dd1e07609ce978251a05f34a Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Mon, 11 Jun 2018 13:35:53 +0200 Subject: [PATCH] NUTCH-2560 protocol-http throws an error when an http header spans over multiple lines - add unit test to verify that multi-line headers are correctly parsed --- .../protocol/http/TestBadServerResponses.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestBadServerResponses.java b/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestBadServerResponses.java index 77bc701572..51c7930b60 100644 --- a/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestBadServerResponses.java +++ b/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestBadServerResponses.java @@ -19,6 +19,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import java.io.BufferedReader; import java.io.InputStreamReader; @@ -62,6 +63,7 @@ public void setUp() throws Exception { conf = new Configuration(); conf.addResource("nutch-default.xml"); conf.addResource("nutch-site-test.xml"); + conf.setBoolean("store.http.headers", true); http = new Http(); http.setConf(conf); @@ -239,6 +241,23 @@ public void testNoStatusLine() throws Exception { new String(fetched.getContent(), StandardCharsets.UTF_8)); } + /** + * NUTCH-2560 protocol-http throws an error when an http header spans over + * multiple lines + */ + @Test + public void testMultiLineHeader() throws Exception { + setUp(); + launchServer(responseHeader + + "Set-Cookie: UserID=JohnDoe;\r\n Max-Age=3600;\r\n Version=1\r\n" + + simpleContent); + Response fetched = fetchPage("/", 200); + LOG.info("Headers: {}", fetched.getHeaders()); + assertNotNull("Failed to set multi-line \"Set-Cookie\" header.", fetched.getHeader("Set-Cookie")); + assertTrue("Failed to set multi-line \"Set-Cookie\" header.", + fetched.getHeader("Set-Cookie").contains("Version=1")); + } + /** * NUTCH-2561 protocol-http can be made to read arbitrarily large HTTP * responses