Skip to content

Commit

Permalink
android.text.Html instead of own html parser
Browse files Browse the repository at this point in the history
  • Loading branch information
geometer committed May 8, 2011
1 parent cee7d52 commit 9d0ca05
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 197 deletions.
22 changes: 11 additions & 11 deletions src/org/geometerplus/fbreader/network/atom/ATOMXMLReader.java
Expand Up @@ -301,7 +301,7 @@ public boolean startElementHandler(
case F_TITLE:
case F_SUBTITLE:
myHtmlToString.appendText(bufferContent);
myHtmlToString.processTextContent(false, tag, attributes);
myHtmlToString.appendStartTag(tag, attributes);
break;
default:
break;
Expand Down Expand Up @@ -379,26 +379,26 @@ public boolean endElementHandler(final String ns, final String tag,
myHtmlToString.appendText(bufferContent);
if (ns == XMLNamespaces.Atom && tag == TAG_TITLE) {
// TODO:implement ATOMTextConstruct & ATOMTitle
final String title = myHtmlToString.finishTextContent();
final String title = myHtmlToString.getText();
if (myFeed != null) {
myFeed.Title = title;
}
myState = FEED;
} else {
myHtmlToString.processTextContent(true, tag, null);
myHtmlToString.appendEndTag(tag);
}
break;
case F_SUBTITLE:
myHtmlToString.appendText(bufferContent);
if (ns == XMLNamespaces.Atom && tag == TAG_SUBTITLE) {
// TODO:implement ATOMTextConstruct & ATOMSubtitle
final String subtitle = myHtmlToString.finishTextContent();
final String subtitle = myHtmlToString.getText();
if (myFeed != null) {
myFeed.Subtitle = subtitle;
}
myState = FEED;
} else {
myHtmlToString.processTextContent(true, tag, null);
myHtmlToString.appendEndTag(tag);
}
break;
case F_UPDATED:
Expand Down Expand Up @@ -504,30 +504,30 @@ public boolean endElementHandler(final String ns, final String tag,
myHtmlToString.appendText(bufferContent);
if (ns == XMLNamespaces.Atom && tag == TAG_SUMMARY) {
// TODO:implement ATOMTextConstruct & ATOMSummary
myEntry.Summary = myHtmlToString.finishTextContent();
myEntry.Summary = myHtmlToString.getText();
myState = F_ENTRY;
} else {
myHtmlToString.processTextContent(true, tag, null);
myHtmlToString.appendEndTag(tag);
}
break;
case FE_CONTENT:
myHtmlToString.appendText(bufferContent);
if (ns == XMLNamespaces.Atom && tag == TAG_CONTENT) {
// TODO:implement ATOMContent
myEntry.Content = myHtmlToString.finishTextContent();
myEntry.Content = myHtmlToString.getText();
myState = F_ENTRY;
} else {
myHtmlToString.processTextContent(true, tag, null);
myHtmlToString.appendEndTag(tag);
}
break;
case FE_TITLE:
myHtmlToString.appendText(bufferContent);
if (ns == XMLNamespaces.Atom && tag == TAG_TITLE) {
// TODO:implement ATOMTextConstruct & ATOMTitle
myEntry.Title = myHtmlToString.finishTextContent();
myEntry.Title = myHtmlToString.getText();
myState = F_ENTRY;
} else {
myHtmlToString.processTextContent(true, tag, null);
myHtmlToString.appendEndTag(tag);
}
break;
case FE_UPDATED:
Expand Down
196 changes: 15 additions & 181 deletions src/org/geometerplus/fbreader/network/atom/HtmlToString.java
Expand Up @@ -19,13 +19,9 @@

package org.geometerplus.fbreader.network.atom;

import java.util.HashMap;
import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import android.text.Html;

import org.geometerplus.zlibrary.core.html.*;
import org.geometerplus.zlibrary.core.util.MimeType;
import org.geometerplus.zlibrary.core.xml.ZLXMLProcessor;
import org.geometerplus.zlibrary.core.xml.ZLStringMap;

import org.geometerplus.fbreader.formats.xhtml.XHTMLReader;
Expand All @@ -35,8 +31,6 @@ public class HtmlToString {
private String myTextType;
private StringBuilder myTextContent = new StringBuilder();

private HtmlToStringReader myHtmlToStringReader = new HtmlToStringReader();

public void setupTextContent(String type) {
if (type == null) {
myTextType = ATOMConstants.TYPE_DEFAULT;
Expand All @@ -52,7 +46,7 @@ public void appendText(String text) {
}
}

public String finishTextContent() {
public String getText() {
char[] contentArray = myTextContent.toString().trim().toCharArray();
String result;
if (contentArray.length == 0) {
Expand All @@ -65,189 +59,29 @@ public String finishTextContent() {
ATOMConstants.TYPE_XHTML.equals(myTextType) ||
MimeType.TEXT_HTML.Name.equals(myTextType) ||
MimeType.TEXT_XHTML.Name.equals(myTextType)) {
myHtmlToStringReader.readFromString(result);
result = myHtmlToStringReader.getString();
result = Html.fromHtml(new String(contentArray)).toString();
}
}
myTextType = null;
myTextContent.delete(0, myTextContent.length());
return result;
}

public void processTextContent(boolean closeTag, String tag, ZLStringMap attributes) {
if (ATOMConstants.TYPE_XHTML.equals(myTextType) ||
MimeType.TEXT_XHTML.Name.equals(myTextType)) {
if (closeTag) {
myTextContent.append("</").append(tag).append(">");
} else {
StringBuilder buffer = new StringBuilder("<").append(tag);
for (int i = 0; i < attributes.getSize(); ++i) {
final String key = attributes.getKey(i);
final String value = attributes.getValue(key);
buffer.append(" ").append(key).append("=\"");
if (value != null) {
buffer.append(value);
}
buffer.append("\"");
}
buffer.append(" >");
myTextContent.append(buffer.toString());
public void appendStartTag(String tag, ZLStringMap attributes) {
myTextContent.append("<").append(tag);
for (int i = 0; i < attributes.getSize(); ++i) {
final String key = attributes.getKey(i);
final String value = attributes.getValue(key);
myTextContent.append(" ").append(key).append("=\"");
if (value != null) {
myTextContent.append(value);
}
myTextContent.append("\"");
}
myTextContent.append(">");
}

private static class HtmlToStringReader implements ZLHtmlReader {

private StringBuilder myBuffer = new StringBuilder();
private byte[] myByteData;
private int myByteDataLength;
private HashMap<String,char[]> myEntityMap;

public void readFromString(String htmlString) {
final StringBuilder html = new StringBuilder();
html.append("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">")
.append("<html><head>")
.append("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />")
.append("<title></title>")
.append("</head><body>")
.append(htmlString)
.append("</body></html>");
final byte[] bytes;
try {
bytes = html.toString().getBytes("UTF-8");
} catch (UnsupportedEncodingException ex) {
throw new RuntimeException("It's impossible!!! UTF-8 charset is not supported!!!", ex);
}
ZLHtmlProcessor.read(this, new ByteArrayInputStream(bytes));
}

public String getString() {
return new String(myBuffer.toString().trim().toCharArray());
}


public void startDocumentHandler() {
myBuffer.delete(0, myBuffer.length());
myByteDataLength = 0;
}

public void endDocumentHandler() {
processByteData();
}

public void startElementHandler(String tag, int offset, ZLHtmlAttributeMap attributes) {
processByteData();
tag = tag.toLowerCase().intern();
if (tag == "br") {
if (myBuffer.length() > 0) {
myBuffer.append('\n');
}
} else if (tag == "hr") {
if (myBuffer.length() > 0) {
if (myBuffer.charAt(myBuffer.length() - 1) != '\n') {
myBuffer.append('\n');
}
myBuffer.append('\n');
}
}
}

public void endElementHandler(String tag) {
processByteData();
tag = tag.toLowerCase().intern();
if (tag == "p") {
if (myBuffer.length() > 0) {
myBuffer.append('\n');
}
}
}

private void processByteData() {
if (myByteDataLength == 0) {
return;
}
final String data;
try {
data = new String(myByteData, 0, myByteDataLength, "UTF-8");
} catch (UnsupportedEncodingException ex) {
throw new RuntimeException("It's impossible!!! UTF-8 charset is not supported!!!", ex);
}
myByteDataLength = 0;
if (data.length() == 0) {
return;
}
if (myBuffer.length() > 0 && !Character.isWhitespace(myBuffer.charAt(myBuffer.length() - 1))) {
myBuffer.append(' ');
}
int index = 0;
while (index < data.length() && Character.isWhitespace(data.charAt(index))) {
++index;
}
boolean lastSpace = false;
while (index < data.length()) {
final char ch = data.charAt(index++);
if (Character.isWhitespace(ch)) {
lastSpace = true;
} else {
if (lastSpace) {
myBuffer.append(' ');
lastSpace = false;
}
myBuffer.append(ch);
}
}
}

public void entityDataHandler(String entity) {
processByteData();

if (entity.length() == 0) {
return;
}

if (myEntityMap == null) {
myEntityMap = new HashMap<String,char[]>(ZLXMLProcessor.getEntityMap(XHTMLReader.xhtmlDTDs()));
}
char[] data = myEntityMap.get(entity);
if (data == null) {
if (entity.charAt(0) == '#') {
try {
int number;
if (entity.charAt(1) == 'x') {
number = Integer.parseInt(entity.substring(2), 16);
} else {
number = Integer.parseInt(entity.substring(1));
}
data = new char[] { (char)number };
} catch (NumberFormatException e) {
}
}
if (data == null) {
data = new char[0];
}
myEntityMap.put(entity, data);
}
//System.err.println("FBREADER -- ENTITY: &" + entity + "; --> " + new String(data));
myBuffer.append(data);
}

public void byteDataHandler(byte[] data, int start, int length) {
if (length <= 0) {
return;
}
if (myByteData == null) {
myByteData = new byte[length];
System.arraycopy(data, start, myByteData, 0, length);
myByteDataLength = length;
} else {
if (myByteData.length < myByteDataLength + length) {
final byte[] oldData = myByteData;
myByteData = new byte[myByteDataLength + length];
System.arraycopy(oldData, 0, myByteData, 0, myByteDataLength);
}
System.arraycopy(data, start, myByteData, myByteDataLength, length);
myByteDataLength += length;
}
}
public void appendEndTag(String tag) {
myTextContent.append("</").append(tag).append(">");
}
}
Expand Up @@ -184,7 +184,7 @@ public boolean startElementHandler(String tag, ZLStringMap attributes) {
break;
case ANNOTATION:
myHtmlToString.appendText(bufferContent);
myHtmlToString.processTextContent(false, tag, attributes);
myHtmlToString.appendStartTag(tag, attributes);
break;
}
return false;
Expand Down Expand Up @@ -316,10 +316,10 @@ public boolean endElementHandler(String tag) {
case ANNOTATION:
myHtmlToString.appendText(bufferContent);
if (TAG_ANNOTATION == tag) {
mySummary = myHtmlToString.finishTextContent();
mySummary = myHtmlToString.getText();
myState = TITLE_INFO;
} else {
myHtmlToString.processTextContent(true, tag, null);
myHtmlToString.appendEndTag(tag);
}
break;
case DATE:
Expand Down
4 changes: 2 additions & 2 deletions src/org/geometerplus/fbreader/network/opds/OPDSXMLReader.java
Expand Up @@ -138,7 +138,7 @@ public boolean startElementHandler(final String ns, final String tag,
break;
case FE_CONTENT:
myHtmlToString.appendText(bufferContent);
myHtmlToString.processTextContent(false, tag, attributes);
myHtmlToString.appendStartTag(tag, attributes);
// FIXME: HACK: html handling must be implemeted neatly
if (tag == TAG_HACK_SPAN || attributes.getValue("class") == "price") {
myState = FEC_HACK_SPAN;
Expand Down Expand Up @@ -175,7 +175,7 @@ public boolean endElementHandler(final String ns, final String tag,
case FEC_HACK_SPAN:
// FIXME: HACK
myHtmlToString.appendText(bufferContent);
myHtmlToString.processTextContent(true, tag, null);
myHtmlToString.appendEndTag(tag);
if (bufferContent != null) {
getOPDSEntry().addAttribute(KEY_PRICE, bufferContent.intern());
}
Expand Down

0 comments on commit 9d0ca05

Please sign in to comment.