Skip to content

Commit

Permalink
Rewrite bibtexml importer with JAXB parser (JabRef#1666)
Browse files Browse the repository at this point in the history
* rewrite bibtexml importer with jaxb parser

* address comments

* remove unused import

* include feedback

* fix import order, log and add testfile
  • Loading branch information
tschechlovdev authored and ayanai1 committed Sep 5, 2016
1 parent c9a81a1 commit f0ba0eb
Show file tree
Hide file tree
Showing 16 changed files with 1,288 additions and 147 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Expand Up @@ -32,7 +32,7 @@ apply plugin: 'checkstyle'

apply from: 'eclipse.gradle'
apply from: 'localization.gradle'
apply from: 'medline.gradle'
apply from: 'xjc.gradle'

group = "net.sf.jabref"
version = "3.6dev"
Expand Down
Expand Up @@ -17,22 +17,34 @@

import java.io.BufferedReader;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Pattern;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Unmarshaller;
import javax.xml.datatype.XMLGregorianCalendar;

import net.sf.jabref.importer.fileformat.bibtexml.Entry;
import net.sf.jabref.importer.fileformat.bibtexml.File;
import net.sf.jabref.importer.fileformat.bibtexml.Inbook;
import net.sf.jabref.importer.fileformat.bibtexml.Incollection;
import net.sf.jabref.logic.importer.ParserResult;
import net.sf.jabref.logic.importer.util.BibTeXMLHandler;
import net.sf.jabref.logic.util.FileExtensions;
import net.sf.jabref.model.entry.BibEntry;
import net.sf.jabref.model.entry.FieldName;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.xml.sax.InputSource;

/**
* Importer for the BibTeXML format.
Expand All @@ -46,6 +58,10 @@ public class BibTeXMLImporter extends ImportFormat {

private static final Pattern START_PATTERN = Pattern.compile("<(bibtex:)?file .*");

private static final List<String> IGNORED_METHODS = Arrays.asList("getClass", "getAnnotate", "getContents",
"getPrice", "getSize", "getChapter");


@Override
public String getFormatName() {
return "BibTeXML";
Expand Down Expand Up @@ -79,33 +95,169 @@ public ParserResult importDatabase(BufferedReader reader) throws IOException {

List<BibEntry> bibItems = new ArrayList<>();

// Obtain a factory object for creating SAX parsers
SAXParserFactory parserFactory = SAXParserFactory.newInstance();
// Configure the factory object to specify attributes of the parsers it
// creates
// parserFactory.setValidating(true);
parserFactory.setNamespaceAware(true);
// Now create a SAXParser object

try {
SAXParser parser = parserFactory.newSAXParser(); //May throw exceptions
BibTeXMLHandler handler = new BibTeXMLHandler();
// Start the parser. It reads the file and calls methods of the handler.
parser.parse(new InputSource(reader), handler);
// When you're done, report the results stored by your handler object
bibItems.addAll(handler.getItems());

} catch (javax.xml.parsers.ParserConfigurationException e) {
JAXBContext context = JAXBContext.newInstance("net.sf.jabref.importer.fileformat.bibtexml");
Unmarshaller unmarshaller = context.createUnmarshaller();
File file = (File) unmarshaller.unmarshal(reader);

List<Entry> entries = file.getEntry();
Map<String, String> fields = new HashMap<>();

for (Entry entry : entries) {
BibEntry bibEntry = new BibEntry();
if (entry.getArticle() != null) {
bibEntry.setType("article");
parse(entry.getArticle(), fields);
} else if (entry.getBook() != null) {
bibEntry.setType("book");
parse(entry.getBook(), fields);
} else if (entry.getBooklet() != null) {
bibEntry.setType("booklet");
parse(entry.getBooklet(), fields);
} else if (entry.getConference() != null) {
bibEntry.setType("conference");
parse(entry.getConference(), fields);
} else if (entry.getInbook() != null) {
bibEntry.setType("inbook");
parseInbook(entry.getInbook(), fields);
} else if (entry.getIncollection() != null) {
bibEntry.setType("incollection");
Incollection incollection = entry.getIncollection();
if (incollection.getChapter() != null) {
fields.put(FieldName.CHAPTER, String.valueOf(incollection.getChapter()));
}
parse(incollection, fields);
} else if (entry.getInproceedings() != null) {
bibEntry.setType("inproceedings");
parse(entry.getInproceedings(), fields);
} else if (entry.getManual() != null) {
bibEntry.setType("manual");
parse(entry.getManual(), fields);
} else if (entry.getMastersthesis() != null) {
bibEntry.setType("mastersthesis");
parse(entry.getMastersthesis(), fields);
} else if (entry.getMisc() != null) {
bibEntry.setType("misc");
parse(entry.getMisc(), fields);
} else if (entry.getPhdthesis() != null) {
bibEntry.setType("phdthesis");
parse(entry.getPhdthesis(), fields);
} else if (entry.getProceedings() != null) {
bibEntry.setType("proceedings");
parse(entry.getProceedings(), fields);
} else if (entry.getTechreport() != null) {
bibEntry.setType("techreport");
parse(entry.getTechreport(), fields);
} else if (entry.getUnpublished() != null) {
bibEntry.setType("unpublished");
parse(entry.getUnpublished(), fields);
}

if (entry.getId() != null) {
bibEntry.setCiteKey(entry.getId());
}
bibEntry.setField(fields);
bibItems.add(bibEntry);
}
} catch (JAXBException e) {
LOGGER.error("Error with XML parser configuration", e);
return ParserResult.fromErrorMessage(e.getLocalizedMessage());
} catch (org.xml.sax.SAXException e) {
LOGGER.error("Error during XML parsing", e);
return ParserResult.fromErrorMessage(e.getLocalizedMessage());
} catch (IOException e) {
LOGGER.error("Error during file import", e);
return ParserResult.fromErrorMessage(e.getLocalizedMessage());
}
return new ParserResult(bibItems);
}

/**
* We use a generic method and not work on the real classes, because they all have the same behaviour. They call all get methods
* that are needed and use the return value. So this will prevent writing similar methods for every type.
* <p>
* In this method, all <Code>get</Code> methods that entryType has will be used and their value will be put to fields,
* if it is not null. So for example if entryType has the method <Code>getAbstract</Code>, then
* "abstract" will be put as key to fields and the value of <Code>getAbstract</Code> will be put as value to fields.
* Some <Code>get</Code> methods shouldn't be mapped to fields, so <Code>getClass</Code> for example will be skipped.
*
* @param entryType This can be all possible BibTeX types. It contains all fields of the entry and their values.
* @param fields A map where the name and the value of all fields that the entry contains will be put.
*/
private <T> void parse(T entryType, Map<String, String> fields) {
Method[] declaredMethods = entryType.getClass().getDeclaredMethods();
for (Method method : declaredMethods) {
try {
if (method.getName().equals("getYear")) {
putYear(fields, (XMLGregorianCalendar) method.invoke(entryType));
continue;
} else if (method.getName().equals("getNumber")) {
putNumber(fields, (BigInteger) method.invoke(entryType));
continue;
} else if (isMethodToIgnore(method.getName())) {
continue;
} else if (method.getName().startsWith("get")) {
putIfValueNotNull(fields, method.getName().replace("get", ""), (String) method.invoke(entryType));
}
} catch (IllegalArgumentException | InvocationTargetException | IllegalAccessException e) {
LOGGER.error("Could not invoke method", e);
}
}
}

/**
* Returns whether the value of the given method name should be mapped or whether the method can be ignored.
*
* @param methodName The name of the method as String
* @return true if the method can be ignored, else false
*/
private boolean isMethodToIgnore(String methodName) {
return IGNORED_METHODS.contains(methodName);
}

/**
* Inbook needs a special Treatment, because <Code>inbook.getContent()</Code> returns a list of <Code>JAXBElements</Code>.
* The other types have just <Code>get</Code> methods, which return the values as Strings.
*/
private void parseInbook(Inbook inbook, Map<String, String> fields) {
List<JAXBElement<?>> content = inbook.getContent();
for (JAXBElement<?> element : content) {
String localName = element.getName().getLocalPart();
Object elementValue = element.getValue();
if (elementValue instanceof String) {
String value = (String) elementValue;
putIfValueNotNull(fields, localName, value);
} else if (elementValue instanceof BigInteger) {
BigInteger value = (BigInteger) elementValue;
if (value != null) {
if (FieldName.NUMBER.equals(localName)) {
fields.put(FieldName.NUMBER, String.valueOf(value));
} else if (FieldName.CHAPTER.equals(localName)) {
fields.put(FieldName.CHAPTER, String.valueOf(value));
}
}
} else if (elementValue instanceof XMLGregorianCalendar) {
XMLGregorianCalendar value = (XMLGregorianCalendar) elementValue;
if (FieldName.YEAR.equals(localName)) {
putYear(fields, value);
} else {
LOGGER.info("Unexpected field was found");
}
} else {
LOGGER.info("Unexpected field was found");
}
}
}

private void putYear(Map<String, String> fields, XMLGregorianCalendar year) {
if (year != null) {
fields.put(FieldName.YEAR, String.valueOf(year));
}
}

private void putNumber(Map<String, String> fields, BigInteger number) {
if (number != null) {
fields.put(FieldName.NUMBER, String.valueOf(number));
}
}

private void putIfValueNotNull(Map<String, String> fields, String key, String value) {
if (value != null) {
fields.put(key, value);
}
}
}
107 changes: 0 additions & 107 deletions src/main/java/net/sf/jabref/logic/importer/util/BibTeXMLHandler.java

This file was deleted.

0 comments on commit f0ba0eb

Please sign in to comment.