From f7891af8cb8606af26b9cba6af3d29bdacf8e3bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=A8=E5=B0=9A=E5=B7=9D?= Date: Wed, 17 Apr 2013 21:40:57 +0800 Subject: [PATCH 1/4] fix coding bug --- src/web/jsp/cached.jsp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/web/jsp/cached.jsp b/src/web/jsp/cached.jsp index 6968bea0be..ccee8acef6 100644 --- a/src/web/jsp/cached.jsp +++ b/src/web/jsp/cached.jsp @@ -49,6 +49,10 @@ // but I don't know how to emit 'byte sequence' in JSP. // out.getOutputStream().write(bean.getContent(details)) may work, // but I'm not sure. + + //fix bug:CharEncodingForConversion in ParseData's ParseMeta, not in ParseData's ContentMeta + metaData = bean.getParseData(details).getParseMeta(); + String encoding = (String) metaData.get("CharEncodingForConversion"); if (encoding != null) { try { @@ -59,8 +63,11 @@ content = new String(bean.getContent(details), "windows-1252"); } } - else - content = new String(bean.getContent(details)); + else { + ////fix bug:if http response Header Content-Type return wrong coding,then get coding from the original content of the page + encoding=org.apache.nutch.protocol.Content.getEncoding(bean.getContent(details), nutchConf.get("parser.character.encoding.default")); + content = new String(bean.getContent(details),encoding); + } } %>