-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
19 changed files
with
4,020 additions
and
1,130 deletions.
There are no files selected for viewing
549 changes: 274 additions & 275 deletions
549
...wser-analytics/src/test/resources/org/schoellerfamily/gedbrowser/reader/data/gl120368.ged
Large diffs are not rendered by default.
Oops, something went wrong.
549 changes: 274 additions & 275 deletions
549
...er-geographics/src/test/resources/org/schoellerfamily/gedbrowser/reader/data/gl120368.ged
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
118 changes: 118 additions & 0 deletions
118
gedbrowser-reader/src/main/java/org/schoellerfamily/gedbrowser/reader/CharsetScanner.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
package org.schoellerfamily.gedbrowser.reader; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.InputStreamReader; | ||
import java.io.Reader; | ||
import java.util.HashMap; | ||
import java.util.Locale; | ||
import java.util.Map; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
import org.schoellerfamily.gedbrowser.datamodel.Attribute; | ||
import org.schoellerfamily.gedbrowser.datamodel.GedObject; | ||
import org.schoellerfamily.gedbrowser.datamodel.Root; | ||
|
||
/** | ||
* Reads the top of a GEDCOM file, looking for the CHAR tag to determine how | ||
* to read the file. | ||
* | ||
* @author Dick Schoeller | ||
*/ | ||
public class CharsetScanner { | ||
/** Logger. */ | ||
private final Log logger = LogFactory.getLog(getClass()); | ||
|
||
/** | ||
* Holds the mapping between GEDCOM known charsets and Java known charsets. | ||
*/ | ||
private static final Map<String, String> CHARSET_MAP = new HashMap<>(); | ||
static { | ||
CHARSET_MAP.put("ansel", "ANSEL"); | ||
CHARSET_MAP.put("ansi", "Cp1252"); | ||
CHARSET_MAP.put("cp1252", "Cp1252"); | ||
CHARSET_MAP.put("unicode", "UTF-16"); | ||
CHARSET_MAP.put("utf-8", "UTF-8"); | ||
CHARSET_MAP.put("utf8", "UTF-8"); | ||
CHARSET_MAP.put("ascii", "ASCII"); | ||
} | ||
|
||
/** | ||
* @param filename the name of the file to scan | ||
* @return the Java charset name | ||
*/ | ||
public String charset(final String filename) { | ||
try (InputStream fis = new StreamManager(filename).getInputStream(); | ||
Reader reader = new InputStreamReader(fis, "ASCII"); | ||
BufferedReader bufferedReader = new BufferedReader(reader)) { | ||
String line; | ||
while ((line = bufferedReader.readLine()) != null) { | ||
if (isCharset(line)) { | ||
return extractCharsetFromLine(line); | ||
} | ||
} | ||
} catch (IOException e) { | ||
logger.warn("Could not read file: " + filename); | ||
} | ||
return "UTF-8"; | ||
} | ||
|
||
/** | ||
* @param line the input line | ||
* @return true if this line is the charset line | ||
*/ | ||
private boolean isCharset(final String line) { | ||
return line.startsWith("1 CHAR"); | ||
} | ||
|
||
/** | ||
* @param line the input line | ||
* @return the charset found there | ||
*/ | ||
private String extractCharsetFromLine(final String line) { | ||
final int space = line.lastIndexOf(' ') + 1; | ||
return gedcomCharsetToJava(line.substring(space)); | ||
} | ||
|
||
/** | ||
* @param root the root of the dataset that we are working with | ||
* @return the Java charset name | ||
*/ | ||
public String charset(final Root root) { | ||
final GedObject gob = root.getAttributes().get(0); | ||
if ("Header".equals(gob.getString())) { | ||
return gedcomCharsetToJava(findCharsetInHeader(gob)); | ||
} | ||
return "UTF-8"; | ||
} | ||
|
||
/** | ||
* Find the GEDCOM charset in the attributes of the header. | ||
* | ||
* @param gob the header ged object | ||
* @return the GEDCOM charset | ||
*/ | ||
private String findCharsetInHeader(final GedObject gob) { | ||
for (final GedObject hgob : gob.getAttributes()) { | ||
if ("Character Set".equals(hgob.getString())) { | ||
return ((Attribute) hgob).getTail(); | ||
} | ||
} | ||
return "UTF-8"; | ||
} | ||
|
||
/** | ||
* @param charset the GEDCOM charset name | ||
* @return the Java charset name | ||
*/ | ||
public String gedcomCharsetToJava(final String charset) { | ||
final String javaCharset = CHARSET_MAP | ||
.get(charset.toLowerCase(Locale.ENGLISH)); | ||
if (javaCharset == null) { | ||
return "UTF-8"; | ||
} | ||
return javaCharset; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
44 changes: 44 additions & 0 deletions
44
gedbrowser-reader/src/main/java/org/schoellerfamily/gedbrowser/reader/StreamManager.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package org.schoellerfamily.gedbrowser.reader; | ||
|
||
import java.io.FileInputStream; | ||
import java.io.FileNotFoundException; | ||
import java.io.InputStream; | ||
|
||
/** | ||
* Can open a stream either in an absolute file location or in the classpath. | ||
* | ||
* @author Dick Schoeller | ||
*/ | ||
public class StreamManager { | ||
|
||
/** | ||
* Location inside JARs where GEDCOMs might be found. | ||
*/ | ||
private static final String DATA_DIR = | ||
"/org/schoellerfamily/gedbrowser/reader/data/"; | ||
|
||
/** | ||
* Holds the name of the file that we are opening. | ||
*/ | ||
private final String filename; | ||
|
||
/** | ||
* Constructor. | ||
* | ||
* @param filename the name of the file that we are opening | ||
*/ | ||
public StreamManager(final String filename) { | ||
this.filename = filename; | ||
} | ||
/** | ||
* @return the input stream | ||
* @throws FileNotFoundException if the file can't be opened | ||
*/ | ||
public InputStream getInputStream() throws FileNotFoundException { | ||
if (filename.charAt(0) == '/') { | ||
return new FileInputStream(filename); | ||
} else { | ||
return getClass().getResourceAsStream(DATA_DIR + filename); | ||
} | ||
} | ||
} |
Oops, something went wrong.