Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JS-179, JS-192, JS-250 Update and fix all lang names. #33

Merged
merged 10 commits into from Feb 13, 2013
508 changes: 465 additions & 43 deletions src/main/java/org/crosswire/common/util/Language.java

Large diffs are not rendered by default.

204 changes: 122 additions & 82 deletions src/main/java/org/crosswire/common/util/Languages.java
Expand Up @@ -14,115 +14,59 @@
* 59 Temple Place - Suite 330
* Boston, MA 02111-1307, USA
*
* Copyright: 2005
* Copyright: 2005-2013
* The copyright to this program is held by it's authors.
*
* ID: $Id$
*/
package org.crosswire.common.util;

import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
import java.util.Set;

import org.crosswire.jsword.book.Books;
import org.crosswire.jsword.internationalisation.LocaleProviderManager;

/**
* A utility class that converts ISO-639 codes or locales to their "friendly"
* language name.
* A utility class that converts bcp-47 codes as supported by {@link Language} to their
* localized language name.
*
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's authors.
* @author DM Smith [dmsmith555 at yahoo dot com]
*/
public class Languages {

/**
* Make the class a true utility class by having a private constructor.
*/
private Languages() {
}

/**
* Determine whether the language code is valid. The code is valid if it is
* null or empty. The code is valid if it is in iso639.properties. If a
* locale is used for the iso639Code, it will use the part before the '_'.
* Thus, this code does not support dialects, except as found in the iso639.
* Get the language name for the BCP-47 specification of the language.
*
* @param iso639Code
* @return true if the language is valid.
*/
public static boolean isValidLanguage(String iso639Code) {
try {
String code = getLanguageCode(iso639Code);
if (DEFAULT_LANG_CODE.equals(code) || UNKNOWN_LANG_CODE.equals(code)) {
return true;
}
getLocalisedCommonLanguages().getString(code);
return true;
} catch (MissingResourceException e) {
return false;
}
}

/**
* Get the language name from the language code. If the code is null or
* empty then it is considered to be DEFAULT_LANG_CODE (that is, English).
* If it starts with x- or is too long then it will return unknown. If the
* code's name cannot be found, it will return the code. If a locale is used
* for the iso639Code, it will use the part before the '_'. Thus, this code
* does not support dialects, except as found in the iso639.
*
* @param iso639Code
* @param code the BCP-47 specification for the language
* @return the name of the language
*/
public static String getLanguageName(String iso639Code) {
String code = getLanguageCode(iso639Code);
public static String getName(String code) {
// Returning the code is the fallback for lookup
String name = code;
try {
return getLocalisedCommonLanguages().getString(code);
} catch (MissingResourceException e) {
try {
return allLangs.getString(code);
} catch (MissingResourceException e1) {
return code;
ResourceBundle langs = getLocalisedCommonLanguages();
if (langs != null) {
name = langs.getString(code);
}
} catch (MissingResourceException e) {
// This is allowed
}
}

/**
* Get the language code from the input. If the code is null or empty then
* it is considered to be DEFAULT_LANG_CODE (that is, English). If a locale
* is used for the iso639Code, it will use the part before the '_'. Thus,
* this code does not support dialects, except as found in the iso639. If it
* is known to be unknown then return unknown. Otherwise, return the 2 or 3
* letter code. Note: it might not be valid.
*
* @param input
* @return the code for the language
*/
public static String getLanguageCode(String input) {
String lookup = input;
if (lookup == null || lookup.length() == 0) {
return DEFAULT_LANG_CODE;
}

if (lookup.indexOf('_') != -1) {
String[] locale = StringUtil.split(lookup, '_');
// We need to check what stands before the _, it might be empty or
// unknown.
return getLanguageCode(locale[0]);
}

// These are not uncommon. Looking for them prevents exceptions
// and provides the same result.
if (lookup.startsWith("x-") || lookup.startsWith("X-") || lookup.length() > 3)
{
return UNKNOWN_LANG_CODE;
}

return lookup;
return name;
}

/**
Expand All @@ -140,7 +84,9 @@ private static ResourceBundle getLocalisedCommonLanguages() {
langs = localisedCommonLanguages.get(locale);
if (langs == null) {
langs = initLanguages(locale);
localisedCommonLanguages.put(locale, langs);
if (langs != null) {
localisedCommonLanguages.put(locale, langs);
}
}
}
}
Expand All @@ -151,17 +97,111 @@ private static ResourceBundle initLanguages(Locale locale) {
try {
return ResourceBundle.getBundle("iso639", locale, CWClassLoader.instance());
} catch (MissingResourceException e) {
// try the iso 639 full
log.info("Unable to find language in iso639 bundle", e);
}
return null;
}

/**
* Provide a fallback lookup against a huge list of all languages.
* The basic list has a few hundred languages. The full list has
* over 7000. As a fallback, this file is not internationalized.
*/
public static class AllLanguages {
/**
* This is a singleton class. Do not allow construction.
*/
private AllLanguages() { }

/**
* Get the language name for the code. If the language name is not known
* then return the code.
*
* @param languageCode
* @return the name for the language.
*/
public static String getName(String languageCode) {
if (instance != null) {
String name = instance.get(languageCode);
if (name != null) {
return name;
}
}
return languageCode;
}

/**
* Do lazy loading of the huge file of languages.
* Note: It is OK for it not to be present.
*/
private static PropertyMap instance;
static {
try {
instance = ResourceUtil.getProperties("iso639full");
log.debug("Loading iso639full.properties file");
} catch (IOException e) {
log.info("Unable to load iso639full.properties", e);
}
}
}

// this is incorrect but see JS-195
return ResourceBundle.getBundle("iso639full", locale, CWClassLoader.instance());
/**
* Provide a fallback lookup against a huge list of all languages.
* The basic list has a few hundred languages. The full list has
* over 7000. As a fallback, this file is not internationalized.
*/
public static class RtoL {
/**
* This is a singleton class. Do not allow construction.
*/
private RtoL() { }

/**
* Determine whether this language is a Left-to-Right or a Right-to-Left
* language. If the language has a script, it is used for the determination.
* Otherwise, check the language.
* <p>
* Note: This is problematic. Languages do not have direction.
* Scripts do. Further, there are over 7000 living languages, many of which
* are written in Right-to-Left scripts and are not listed here.
* </p>
*
* @param script the iso15924 script code, must be in Title case
* @param lang the iso639 language code, must be lower case
* @return true if the language is Right-to-Left
*/
public static boolean isRtoL(String script, String lang) {
if (script != null) {
return rtol.contains(script);
}
if (lang != null) {
return rtol.contains(lang);
}
return false;
}

/**
* Do lazy loading of the huge file of languages.
* Note: It is OK for it not to be present.
*/
private static Set rtol = new HashSet();
/**
* load RtoL data
*/
static {
try {
URL index = ResourceUtil.getResource(Translations.class, "rtol.txt");
String[] list = NetUtil.listByIndexFile(NetUtil.toURI(index));
log.debug("Loading iso639full.properties file");
for (int i = 0; i < list.length; i++) {
rtol.add(list[i]);
}
} catch (IOException ex) {
log.info("Unable to load rtol.txt", ex);
}
}
}

public static final String DEFAULT_LANG_CODE = "en";
private static final String UNKNOWN_LANG_CODE = "und";
private static final Logger log = Logger.getLogger(Books.class);
private static/* final */ResourceBundle allLangs;
private static Map<Locale, ResourceBundle> localisedCommonLanguages = new HashMap<Locale, ResourceBundle>();
}
33 changes: 25 additions & 8 deletions src/main/java/org/crosswire/common/util/NetUtil.java
Expand Up @@ -534,13 +534,17 @@ public static String[] listByIndexFile(URI index) throws IOException {
}

/**
* List all the files specified by the index file passed in. To be
* acceptable it:
* List all the files specified by the index file passed in.
* <p>Each line is pre-processed:</p>
* <ul>
* <li>Ignore comments (# to end of line)</li>
* <li>Trim spaces from line.</li>
* <li>Ignore blank lines.</li>
*
* To be acceptable it:
* <ul>
* <li> must be a non-0 length string,</li>
* <li> not commented with #,</li>
* <li> not the index file itself</li>
* <li> and acceptable by the filter.</li>
* <li> cannot be the index file itself</li>
* <li> and must acceptable by the filter.</li>
* </ul>
*
* @return String[] Matching results.
Expand All @@ -565,11 +569,24 @@ public static String[] listByIndexFile(URI index, URIFilter filter) throws IOExc
break;
}

String name = line;

// Strip comments from the line
int len = name.length();
int commentPos;
for (commentPos = 0; commentPos < len && name.charAt(commentPos) != '#'; ++commentPos) {
continue; // test does the work
}

if (commentPos < len) {
name = name.substring(0, commentPos);
}

// we need to trim extraneous whitespace on the line
String name = line.trim();
name = name.trim();

// Is it acceptable?
if (name.length() > 0 && name.charAt(0) != '#' && !name.equals(INDEX_FILE) && filter.accept(name)) {
if (name.length() > 0 && !name.equals(INDEX_FILE) && filter.accept(name)) {
list.add(name);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/crosswire/common/util/Translations.java
Expand Up @@ -227,7 +227,7 @@ private void loadSupportedTranslations() {
}

public String toString(String translationCode) {
StringBuilder currentTranslation = new StringBuilder(Languages.getLanguageName(translationCode));
StringBuilder currentTranslation = new StringBuilder(Languages.getName(translationCode));

if (translationCode.indexOf('_') != -1) {
String[] locale = StringUtil.split(translationCode, '_');
Expand Down
4 changes: 3 additions & 1 deletion src/main/java/org/crosswire/jsword/book/BookData.java
Expand Up @@ -218,7 +218,9 @@ private Element getOsisContent() throws BookException {
Book book = books[i];
cell = OSISUtil.factory().createCell();
Language lang = (Language) book.getProperty(BookMetaData.KEY_XML_LANG);
cell.setAttribute(OSISUtil.OSIS_ATTR_LANG, lang.getCode(), Namespace.XML_NAMESPACE);
if (lang != null) {
cell.setAttribute(OSISUtil.OSIS_ATTR_LANG, lang.getCode(), Namespace.XML_NAMESPACE);
}

row.addContent(cell);

Expand Down
Expand Up @@ -77,7 +77,7 @@ public DefaultBookMetaData(BookDriver driver, String name, BookCategory type) {
setDriver(driver);
setName(name);
setBookCategory(type);
setLanguage(new Language(null)); // Default language
setLanguage(Language.DEFAULT_LANG); // Default language
}

/*
Expand Down