Skip to content

Commit

Permalink
Merge branch 'master' into improvement/spring-configuration-overrides
Browse files Browse the repository at this point in the history
Conflicts:
	web/src/main/java/org/fao/geonet/kernel/search/spatial/OrSpatialFilter.java
	web/src/main/webapp/WEB-INF/config-security-mapping.xml
  • Loading branch information
Jesse Eichar committed Jan 31, 2013
2 parents 59ce197 + 9a8f0d2 commit 51f957e
Show file tree
Hide file tree
Showing 51 changed files with 404 additions and 754 deletions.
Expand Up @@ -1038,7 +1038,7 @@ GeoNetwork.util.SearchFormTools = {
var defaultCodeList = [['dataset', OpenLayers.i18n('dataset')],
['series', OpenLayers.i18n('series')],
['service', OpenLayers.i18n('service')],
['model', OpenLayers.i18n('featureCat')]],
['featureCatalog', OpenLayers.i18n('featureCat')]],
config = {
name: 'E_type',
mode: 'local',
Expand Down
Expand Up @@ -41,9 +41,14 @@
import org.fao.geonet.kernel.harvest.harvester.Privileges;
import org.fao.geonet.kernel.harvest.harvester.RecordInfo;
import org.fao.geonet.kernel.harvest.harvester.UUIDMapper;
import org.fao.geonet.kernel.search.LuceneSearcher;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.xpath.XPath;

import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

//=============================================================================
Expand Down Expand Up @@ -384,6 +389,13 @@ private Element retrieveMetadata(String uuid)
return null;
}
}

if(params.rejectDuplicateResource) {
if (foundDuplicateForResource(uuid, response)) {
return null;
}
}

return response;
}
catch(Exception e)
Expand All @@ -397,6 +409,71 @@ private Element retrieveMetadata(String uuid)
}
}

/**
* Check for metadata in the catalog having the same resource identifier as the
* harvested record.
*
* If one dataset (same MD_metadata/../identificationInfo/../identifier/../code)
* (eg. a NMA layer for roads) is described in 2 or more catalogs with different
* metadata uuids. The metadata may be slightly different depending on the author,
* but the resource is the same. When harvesting, some users would like to have
* the capability to exclude "duplicate" description of the same dataset.
*
* The check is made searching the identifier field in the index using
* {@link LuceneSearcher#getAllMetadataFromIndexFor(String, String, String, Set, boolean)}
*
* @param uuid the metadata unique identifier
* @param response the XML document to check
* @return true if a record with same resource identifier is found. false otherwise.
*/
private boolean foundDuplicateForResource(String uuid, Element response) {
String schema = dataMan.autodetectSchema(response);

if(schema.startsWith("iso19139")) {
String resourceIdentifierXPath = "gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:identifier/*/gmd:code/gco:CharacterString";
String resourceIdentifierLuceneIndexField = "identifier";
String defaultLanguage = "eng";

try {
// Extract resource identifier
XPath xp = XPath.newInstance (resourceIdentifierXPath);
xp.addNamespace("gmd", "http://www.isotc211.org/2005/gmd");
xp.addNamespace("gco", "http://www.isotc211.org/2005/gco");
List<Element> resourceIdentifiers = xp.selectNodes(response);
if (resourceIdentifiers.size() > 0) {
// Check if the metadata to import has a resource identifier
// existing in current catalog for a record with a different UUID

log.debug(" - Resource identifiers found : " + resourceIdentifiers.size());

for (Element identifierNode : resourceIdentifiers) {
String identifier = identifierNode.getTextTrim();
log.debug(" - Searching for duplicates for resource identifier: " + identifier);

Map<String, Map<String,String>> values = LuceneSearcher.getAllMetadataFromIndexFor(defaultLanguage, resourceIdentifierLuceneIndexField,
identifier, Collections.singleton("_uuid"), true);
log.debug(" - Number of resources with same identifier: " + values.size());
for (String key : values.keySet()) {
Map<String, String> recordFieldValues = values.get(key);
String indexRecordUuid = recordFieldValues.get("_uuid");
if (!indexRecordUuid.equals(uuid)) {
log.debug(" - UUID " + indexRecordUuid + " in index does not match harvested record UUID " + uuid);
log.warning(" - Duplicates found. Skipping record with UUID " + uuid + " and resource identifier " + identifier);

result.duplicatedResource ++;
return true;
}
}
}
}
} catch (Exception e) {
log.warning(" - Error when searching for resource duplicate " + uuid + ". Error is: " + e.getMessage());
e.printStackTrace();
}
}
return false;
}

//--------------------------------------------------------------------------
//---
//--- Variables
Expand Down
Expand Up @@ -149,6 +149,7 @@ protected void storeNodeExtra(Dbms dbms, AbstractParams p, String path,

settingMan.add(dbms, "id:"+siteId, "capabUrl", params.capabUrl);
settingMan.add(dbms, "id:"+siteId, "icon", params.icon);
settingMan.add(dbms, "id:"+siteId, "rejectDuplicateResource", params.rejectDuplicateResource);

//--- store search nodes

Expand Down Expand Up @@ -206,7 +207,8 @@ protected Element getResult() {
add(res, "added", result.addedMetadata);
add(res, "updated", result.updatedMetadata);
add(res, "unchanged", result.unchangedMetadata);
add(res, "unknownSchema",result.unknownSchema);
add(res, "duplicatedResource",result.duplicatedResource);
add(res, "unknownSchema",result.unknownSchema);
add(res, "removed", result.locallyRemoved);
add(res, "unretrievable",result.unretrievable);
add(res, "doesNotValidate",result.doesNotValidate);
Expand Down Expand Up @@ -247,6 +249,7 @@ class CswResult
public int updatedMetadata;
public int unchangedMetadata;
public int locallyRemoved;
public int duplicatedResource;
public int unknownSchema;
public int unretrievable;
public int doesNotValidate;
Expand Down
Expand Up @@ -65,7 +65,8 @@ public void create(Element node) throws BadInputEx
Element searches = node.getChild("searches");

capabUrl = Util.getParam(site, "capabilitiesUrl", "");

rejectDuplicateResource = Util.getParam(site, "rejectDuplicateResource", false);

try {
capabUrl = URLDecoder.decode(capabUrl, "UTF-8");
}
Expand Down Expand Up @@ -93,7 +94,8 @@ public void update(Element node) throws BadInputEx
Element searches = node.getChild("searches");

capabUrl = Util.getParam(site, "capabilitiesUrl", capabUrl);

rejectDuplicateResource = Util.getParam(site, "rejectDuplicateResource", rejectDuplicateResource);

try {
capabUrl = URLDecoder.decode(capabUrl, "UTF-8");
}
Expand Down Expand Up @@ -133,6 +135,7 @@ public CswParams copy()

copy.capabUrl = capabUrl;
copy.icon = icon;
copy.rejectDuplicateResource = rejectDuplicateResource;

for (Search s : alSearches)
copy.alSearches.add(s.copy());
Expand Down Expand Up @@ -168,6 +171,7 @@ private void addSearches(Element searches)

public String capabUrl;
public String icon;
public boolean rejectDuplicateResource;

private List<Search> alSearches = new ArrayList<Search>();
}
Expand Down
Expand Up @@ -25,8 +25,10 @@

import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import jeeves.exceptions.BadParameterEx;
Expand All @@ -51,8 +53,10 @@
import org.fao.geonet.csw.common.requests.GetRecordsRequest;
import org.fao.geonet.kernel.DataManager;
import org.fao.geonet.kernel.harvest.harvester.RecordInfo;
import org.fao.geonet.kernel.search.LuceneSearcher;
import org.fao.geonet.lib.Lib;
import org.jdom.Element;
import org.jdom.xpath.XPath;

//=============================================================================

Expand Down Expand Up @@ -547,7 +551,7 @@ private RecordInfo getRecordInfo(Element record)
log.warning("Record doesn't have a uuid : "+ name);
return null; // skip this one
}

String modified = dm.extractDateModified(schema, record);
if (modified.length() == 0) modified = null;
if(log.isDebugEnabled())
Expand Down
67 changes: 42 additions & 25 deletions web/src/main/java/org/fao/geonet/kernel/search/LuceneSearcher.java
Expand Up @@ -1546,57 +1546,64 @@ else if (isTemplate.equals("s")) {
/**
* Searches in Lucene index and return Lucene index field value. Metadata records is retrieved based on its uuid.
*
* @param webappName
* @param priorityLang
* @param id
* @param fieldname
* @return
* @throws Exception
*/
public static String getMetadataFromIndex(String webappName, String priorityLang, String id, String fieldname) throws Exception {
return LuceneSearcher.getMetadataFromIndex(webappName, priorityLang, id, Collections.singleton(fieldname)).get(fieldname);
public static String getMetadataFromIndex(String priorityLang, String id, String fieldname) throws Exception {
return LuceneSearcher.getMetadataFromIndex(priorityLang, id, Collections.singleton(fieldname)).get(fieldname);
}

/**
* TODO javadoc.
*
* @param webappName
* @param priorityLang
* @param webappNameg
* @param id
* @param fieldname
* @return
* @throws Exception
*/
public static String getMetadataFromIndexById(String webappName, String priorityLang, String id, String fieldname) throws Exception {
return LuceneSearcher.getMetadataFromIndex(webappName, priorityLang, "_id", id, Collections.singleton(fieldname)).get(fieldname);
public static String getMetadataFromIndexById(String priorityLang, String id, String fieldname) throws Exception {
return LuceneSearcher.getMetadataFromIndex(priorityLang, "_id", id, Collections.singleton(fieldname)).get(fieldname);
}

/**
* TODO javadoc.
*
* @param webappName
* @param priorityLang
* @param uuid
* @param fieldnames
* @return
* @throws Exception
*/
private static Map<String,String> getMetadataFromIndex(String webappName, String priorityLang, String uuid, Set<String> fieldnames) throws Exception {
return LuceneSearcher.getMetadataFromIndex(webappName, priorityLang, "_uuid", uuid, fieldnames);
private static Map<String,String> getMetadataFromIndex(String priorityLang, String uuid, Set<String> fieldnames) throws Exception {
return LuceneSearcher.getMetadataFromIndex(priorityLang, "_uuid", uuid, fieldnames);
}

public static Map<String,String> getMetadataFromIndex(String priorityLang, String idField, String id, Set<String> fieldnames) throws Exception {
Map<String,Map<String,String>> results = LuceneSearcher.getAllMetadataFromIndexFor(priorityLang, idField, id, fieldnames, false);
if (results.size() == 1) {
return (Map<String, String>) results.values().toArray()[0];
} else {
return new HashMap<String, String>();
}
}

/**
* TODO javadoc.
* Get Lucene index fields for matching records
*
* @param priorityLang Preferred index language to use.
* @param field Field to search for (eg. _uuid)
* @param value Value to search for
* @param returnFields Fields to return
* @param checkAllHits If false, only the first match is analyzed for returned field.
* Set to true when searching on uuid field and only one record is expected.
*
* @param webappName
* @param priorityLang
* @param idField
* @param id
* @param fieldnames
* @return
* @throws Exception
*/
private static Map<String,String> getMetadataFromIndex(String webappName, String priorityLang, String idField, String id, Set<String> fieldnames) throws Exception {
public static Map<String,Map<String,String>> getAllMetadataFromIndexFor(String priorityLang, String field, String value, Set<String> returnFields, boolean checkAllHits) throws Exception {
final IndexAndTaxonomy indexAndTaxonomy;
final SearchManager searchmanager;
ServiceContext context = ServiceContext.get();
Expand All @@ -1611,27 +1618,37 @@ private static Map<String,String> getMetadataFromIndex(String webappName, String
}
IndexSearcher searcher = new IndexSearcher(reader);

Map<String, String> values = new HashMap<String, String>();
Map<String, Map<String, String>> records = new HashMap<String, Map<String, String>>();

try {
TermQuery query = new TermQuery(new Term(idField, id));
TermQuery query = new TermQuery(new Term(field, value));
SettingInfo settingInfo = _sm.get_settingInfo();
boolean sortRequestedLanguageOnTop = settingInfo.getRequestedLanguageOnTop();
if(Log.isDebugEnabled(Geonet.LUCENE))
Log.debug(Geonet.LUCENE, "sortRequestedLanguageOnTop: " + sortRequestedLanguageOnTop);

int numberOfHits = 1;
int counter = 0;
if (checkAllHits) {
numberOfHits = Integer.MAX_VALUE;
}
Sort sort = LuceneSearcher.makeSort(Collections.<Pair<String, Boolean>>emptyList(), priorityLang, sortRequestedLanguageOnTop);
Filter filter = NoFilterFilter.instance();
TopDocs tdocs = searcher.search(query, filter, 1, sort);

TopDocs tdocs = searcher.search(query, filter, numberOfHits, sort);
for( ScoreDoc sdoc : tdocs.scoreDocs ) {
DocumentStoredFieldVisitor docVisitor = new DocumentStoredFieldVisitor(fieldnames);
Map<String, String> values = new HashMap<String, String>();

DocumentStoredFieldVisitor docVisitor = new DocumentStoredFieldVisitor(returnFields);
reader.document(sdoc.doc, docVisitor);
Document doc = docVisitor.getDocument();

for( String fieldname : fieldnames ) {
for( String fieldname : returnFields ) {
values.put(fieldname, doc.get(fieldname));
}

records.put(String.valueOf(counter), values);
counter ++;
}

} catch (CorruptIndexException e) {
Expand All @@ -1644,7 +1661,7 @@ private static Map<String,String> getMetadataFromIndex(String webappName, String
} finally {
searchmanager.releaseIndexReader(indexAndTaxonomy);
}
return values;
return records;
}

/**
Expand Down
Expand Up @@ -28,15 +28,19 @@ public OrSpatialFilter(Query query, int numHits, Envelope bounds,
this.filters = filters;
}

private static final long serialVersionUID = 1L;

@Override
public Filter createGeomFilter(FilterFactory2 filterFactory, PropertyName geomPropertyName, Literal geomExpression) {
protected Filter createFilter(FeatureSource<SimpleFeatureType, SimpleFeature> source) {
List<Filter> ops = new ArrayList<Filter>(filters.size());

for (SpatialFilter sfilter : filters) {
ops.add(sfilter.createGeomFilter(filterFactory, geomPropertyName, geomExpression));
ops.add(sfilter.createFilter(source));
}
return filterFactory.or(ops);
return _filterFactory.or(ops);

}

@Override
public Filter createGeomFilter(FilterFactory2 filterFactory, PropertyName geomPropertyName, Literal geomExpression) {
throw new UnsupportedOperationException();
}
}
6 changes: 2 additions & 4 deletions web/src/main/java/org/fao/geonet/util/XslUtil.java
Expand Up @@ -179,12 +179,11 @@ public static String posListToWktCoords(Object coords, Object dim){
* @return metadata title or an empty string if Lucene index or uuid could not be found
*/
public static String getIndexField(Object appName, Object uuid, Object field, Object lang) {
String webappName = appName.toString();
String id = uuid.toString();
String fieldname = field.toString();
String language = (lang.toString().equals("") ? null : lang.toString());
try {
String fieldValue = LuceneSearcher.getMetadataFromIndex(webappName, language, id, fieldname);
String fieldValue = LuceneSearcher.getMetadataFromIndex(language, id, fieldname);
if(fieldValue == null) {
return getIndexFieldById(appName,uuid,field,lang);
}
Expand All @@ -196,11 +195,10 @@ public static String getIndexField(Object appName, Object uuid, Object field, Ob
}

public static String getIndexFieldById(Object appName, Object id, Object field, Object lang) {
String webappName = appName.toString();
String fieldname = field.toString();
String language = (lang.toString().equals("") ? null : lang.toString());
try {
String fieldValue = LuceneSearcher.getMetadataFromIndexById(webappName, language, id.toString(), fieldname);
String fieldValue = LuceneSearcher.getMetadataFromIndexById(language, id.toString(), fieldname);
return fieldValue == null ? "" : fieldValue;
} catch (Exception e) {
Log.error(Geonet.GEONETWORK, "Failed to get index field value caused by " + e.getMessage());
Expand Down
Expand Up @@ -12,3 +12,4 @@ ALTER TABLE usergroups ADD PRIMARY KEY (userid, profile, groupid);
ALTER TABLE Metadata ALTER COLUMN harvestUri varchar(512);

ALTER TABLE HarvestHistory ADD elapsedTime int;
UPDATE HarvestHistory SET elapsedTime = 0 WHERE elapsedTime IS NULL;

0 comments on commit 51f957e

Please sign in to comment.