Skip to content

Commit

Permalink
Merge e2c84b1 into 251c8d5
Browse files Browse the repository at this point in the history
  • Loading branch information
dickschoeller authored Jan 1, 2018
2 parents 251c8d5 + e2c84b1 commit 807daf3
Show file tree
Hide file tree
Showing 19 changed files with 4,020 additions and 1,130 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.schoellerfamily.gedbrowser.persistence.mongo.domain.RootDocumentMongo;
import org.schoellerfamily.gedbrowser.persistence.mongo.gedconvert.GedObjectToGedDocumentMongoConverter;
import org.schoellerfamily.gedbrowser.persistence.mongo.repository.RepositoryManagerMongo;
import org.schoellerfamily.gedbrowser.reader.CharsetScanner;
import org.schoellerfamily.gedbrowser.reader.GedFile;
import org.schoellerfamily.gedbrowser.reader.GedLineToGedObjectTransformer;
import org.springframework.beans.factory.annotation.Autowired;
Expand Down Expand Up @@ -100,10 +101,10 @@ protected RootDocument loadRepository(final String dbName) {
Root root;

final String filename = buildFileName(dbName);

final String charset = new CharsetScanner().charset(filename);
final File file = new File(filename);
try (FileInputStream fis = new FileInputStream(file);
Reader reader = new InputStreamReader(fis, "UTF-8");
Reader reader = new InputStreamReader(fis, charset);
BufferedReader bufferedReader = new BufferedReader(reader);) {
final GedFile gedFile = new GedFile(filename, dbName, finder,
bufferedReader);
Expand Down
5 changes: 5 additions & 0 deletions gedbrowser-reader/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
<groupId>org.schoellerfamily.gedbrowser</groupId>
<artifactId>gedbrowser-datamodel</artifactId>
</dependency>
<dependency>
<groupId>org.clojars.rayne</groupId>
<artifactId>anselcharset</artifactId>
<version>${anselcharset.version}</version>
</dependency>

<dependency>
<groupId>org.springframework</groupId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package org.schoellerfamily.gedbrowser.reader;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.schoellerfamily.gedbrowser.datamodel.Attribute;
import org.schoellerfamily.gedbrowser.datamodel.GedObject;
import org.schoellerfamily.gedbrowser.datamodel.Root;

/**
* Reads the top of a GEDCOM file, looking for the CHAR tag to determine how
* to read the file.
*
* @author Dick Schoeller
*/
public class CharsetScanner {
/** Logger. */
private final Log logger = LogFactory.getLog(getClass());

/**
* Holds the mapping between GEDCOM known charsets and Java known charsets.
*/
private static final Map<String, String> CHARSET_MAP = new HashMap<>();
static {
CHARSET_MAP.put("ansel", "ANSEL");
CHARSET_MAP.put("ansi", "Cp1252");
CHARSET_MAP.put("cp1252", "Cp1252");
CHARSET_MAP.put("unicode", "UTF-16");
CHARSET_MAP.put("utf-8", "UTF-8");
CHARSET_MAP.put("utf8", "UTF-8");
CHARSET_MAP.put("ascii", "ASCII");
}

/**
* @param filename the name of the file to scan
* @return the Java charset name
*/
public String charset(final String filename) {
try (InputStream fis = new StreamManager(filename).getInputStream();
Reader reader = new InputStreamReader(fis, "ASCII");
BufferedReader bufferedReader = new BufferedReader(reader)) {
String line;
while ((line = bufferedReader.readLine()) != null) {
if (isCharset(line)) {
return extractCharsetFromLine(line);
}
}
} catch (IOException e) {
logger.warn("Could not read file: " + filename);
}
return "UTF-8";
}

/**
* @param line the input line
* @return true if this line is the charset line
*/
private boolean isCharset(final String line) {
return line.startsWith("1 CHAR");
}

/**
* @param line the input line
* @return the charset found there
*/
private String extractCharsetFromLine(final String line) {
final int space = line.lastIndexOf(' ') + 1;
return gedcomCharsetToJava(line.substring(space));
}

/**
* @param root the root of the dataset that we are working with
* @return the Java charset name
*/
public String charset(final Root root) {
final GedObject gob = root.getAttributes().get(0);
if ("Header".equals(gob.getString())) {
return gedcomCharsetToJava(findCharsetInHeader(gob));
}
return "UTF-8";
}

/**
* Find the GEDCOM charset in the attributes of the header.
*
* @param gob the header ged object
* @return the GEDCOM charset
*/
private String findCharsetInHeader(final GedObject gob) {
for (final GedObject hgob : gob.getAttributes()) {
if ("Character Set".equals(hgob.getString())) {
return ((Attribute) hgob).getTail();
}
}
return "UTF-8";
}

/**
* @param charset the GEDCOM charset name
* @return the Java charset name
*/
public String gedcomCharsetToJava(final String charset) {
final String javaCharset = CHARSET_MAP
.get(charset.toLowerCase(Locale.ENGLISH));
if (javaCharset == null) {
return "UTF-8";
}
return javaCharset;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ public ReaderSource(final BufferedReader reader) {
@Override
public AbstractGedLine createGedLine(final AbstractGedLine parent)
throws IOException {
return createGedLine(parent, reader.readLine());
final String line = reader.readLine();
return createGedLine(parent, line);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package org.schoellerfamily.gedbrowser.reader;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;

/**
* Can open a stream either in an absolute file location or in the classpath.
*
* @author Dick Schoeller
*/
public class StreamManager {

/**
* Location inside JARs where GEDCOMs might be found.
*/
private static final String DATA_DIR =
"/org/schoellerfamily/gedbrowser/reader/data/";

/**
* Holds the name of the file that we are opening.
*/
private final String filename;

/**
* Constructor.
*
* @param filename the name of the file that we are opening
*/
public StreamManager(final String filename) {
this.filename = filename;
}
/**
* @return the input stream
* @throws FileNotFoundException if the file can't be opened
*/
public InputStream getInputStream() throws FileNotFoundException {
if (filename.charAt(0) == '/') {
return new FileInputStream(filename);
} else {
return getClass().getResourceAsStream(DATA_DIR + filename);
}
}
}
Loading

0 comments on commit 807daf3

Please sign in to comment.