Skip to content

Commit

Permalink
iLocal File System Harvester and batch import / Improvements
Browse files Browse the repository at this point in the history
Batch import:
* Add recurse option (like in LFS harvester)

Local file system harvester:
* Add option to check on file date to update or not the catalog record
* Replace System.out by log
* Fix harvester report which was not saved and displayed
* Also import file with upper case extension
  • Loading branch information
François Prunayre committed Jun 17, 2013
1 parent 39d92d0 commit f47a03d
Show file tree
Hide file tree
Showing 39 changed files with 228 additions and 128 deletions.
40 changes: 40 additions & 0 deletions jeeves/src/main/java/jeeves/utils/IO.java
Expand Up @@ -26,11 +26,14 @@
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;

import jeeves.constants.Jeeves;
import jeeves.server.context.ServiceContext;
Expand Down Expand Up @@ -163,6 +166,43 @@ public static void closeQuietly(Statement stmt) {
}
}
}

/**
* Returns a list of all file names in a directory - if recurse is true,
* processes all subdirectories too.
* @param directory
* @param recurse
* @return
*/
public static List<File> getFilesInDirectory(File directory, boolean recurse, FilenameFilter filter) throws IOException {
List<File> fileList = new ArrayList<File>();
if(! directory.exists()) {
throw new IOException("Directory does not exist: "+ directory.getAbsolutePath());
}
if(! directory.canRead()) {
throw new IOException("Cannot read directory: "+ directory.getAbsolutePath());
}
if(! directory.isDirectory()) {
throw new IOException("Directory is not a directory: "+ directory.getAbsolutePath());
}
for(File file : directory.listFiles(filter)) {
if(file.isDirectory()) {
if(recurse) {
// recurse
fileList.addAll(getFilesInDirectory(file, recurse, filter));
}
}
else {
if(! file.canRead()) {
throw new IOException("Cannot read file "+ file.getAbsolutePath());
}
else {
fileList.add(file);
}
}
}
return fileList;
}
}

//=============================================================================
Expand Down
1 change: 1 addition & 0 deletions web/src/main/java/org/fao/geonet/constants/Params.java
Expand Up @@ -29,6 +29,7 @@
public final class Params {
public static final String ACCESS = "access";
public static final String BTN = "btn";
public static final String RECURSE = "recurse";
public static final String CATEGORY = "category";
public static final String CHOICE = "choice";
public static final String JURISDICTION = "jurisdiction";
Expand Down
Expand Up @@ -440,7 +440,7 @@ private String getCqlConstraint(Search s)
}
}
} else {
System.out.println("no search criterion specified, harvesting all ... ");
log.debug("no search criterion specified, harvesting all ... ");
}

/*
Expand Down
Expand Up @@ -22,12 +22,22 @@
//==============================================================================
package org.fao.geonet.kernel.harvest.harvester.localfilesystem;

import java.io.File;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.UUID;

import jeeves.exceptions.BadInputEx;
import jeeves.interfaces.Logger;
import jeeves.resources.dbms.Dbms;
import jeeves.server.context.ServiceContext;
import jeeves.server.resources.ResourceManager;
import jeeves.utils.IO;
import jeeves.utils.Xml;

import org.fao.geonet.constants.Geonet;
import org.fao.geonet.kernel.harvest.harvester.AbstractHarvester;
import org.fao.geonet.kernel.harvest.harvester.AbstractParams;
Expand All @@ -41,13 +51,6 @@
import org.jdom.Element;
import org.jdom.JDOMException;

import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;

/**
* Harvester for local filesystem.
*
Expand All @@ -57,7 +60,6 @@
public class LocalFilesystemHarvester extends AbstractHarvester {

private LocalFilesystemParams params;
private HarvestResult result;

public static void init(ServiceContext context) throws Exception {
}
Expand All @@ -66,11 +68,12 @@ public static void init(ServiceContext context) throws Exception {
protected void storeNodeExtra(Dbms dbms, AbstractParams params, String path, String siteId, String optionsId) throws SQLException {
LocalFilesystemParams lp = (LocalFilesystemParams) params;
super.setParams(lp);

settingMan.add(dbms, "id:"+siteId, "icon", lp.icon);
settingMan.add(dbms, "id:"+siteId, "recurse", lp.recurse);
settingMan.add(dbms, "id:"+siteId, "directory", lp.directoryname);
settingMan.add(dbms, "id:"+siteId, "nodelete", lp.nodelete);
settingMan.add(dbms, "id:"+siteId, "checkFileLastModifiedForUpdate", lp.checkFileLastModifiedForUpdate);
}

@Override
Expand All @@ -93,57 +96,18 @@ protected String doAdd(Dbms dbms, Element node) throws BadInputEx, SQLException
return id;
}

/**
* Returns a list of all file names in a directory - if recurse is true,
* processes all subdirectories too.
* @param directory
* @param recurse
* @return
*/
private List<String> harvestFromDirectory(File directory, boolean recurse) throws IOException {
System.out.println("LocalFilesystem harvesting: directory " + directory.getAbsolutePath());
List<String> results = new ArrayList<String>();
if(! directory.exists()) {
throw new IOException("directory does not exist: "+ directory.getAbsolutePath());
}
if(! directory.canRead()) {
throw new IOException("cannot read directory: "+ directory.getAbsolutePath());
}
if(! directory.isDirectory()) {
throw new IOException("directory is not a directory: "+ directory.getAbsolutePath());
}
for(File file : directory.listFiles(new XMLExtensionFilenameFilter(XMLExtensionFilenameFilter.ACCEPT_DIRECTORIES))) {
if(file.isDirectory()) {
if(recurse) {
// recurse
results.addAll(harvestFromDirectory(file, recurse));
}
}
else {
if(! file.canRead()) {
throw new IOException("cannot read file "+ file.getAbsolutePath());
}
else {
System.out.println("adding file: " + file.getName());
results.add(file.getAbsolutePath());
}
}
}
return results;
}

/**
* Aligns new results from filesystem harvesting. Contrary to practice in e.g. CSW Harvesting,
* files removed from the harvesting source are NOT removed from the database. Also, no checks
* on modification date are done; the result gets inserted or replaced if the result appears to
* be in a supported schema.
* @param results
* @param listOfFiles
* @param rm
* @throws Exception
*/
private void align(List<String> results, ResourceManager rm) throws Exception {
System.out.println("Start of alignment for : "+ params.name);
this.result = new HarvestResult();
private HarvestResult align(List<File> listOfFiles, ResourceManager rm) throws Exception {
log.debug("Start of alignment for : "+ params.name);
result = new HarvestResult();
Dbms dbms = (Dbms) rm.open(Geonet.Res.MAIN_DB);

boolean transformIt = false;
Expand All @@ -163,19 +127,21 @@ private void align(List<String> results, ResourceManager rm) throws Exception {
//-----------------------------------------------------------------------
//--- insert/update new metadata

for(String xmlFile : results) {
for(File file : listOfFiles) {
result.totalMetadata++;
Element xml;
String filePath = file.getCanonicalPath();

try {
System.out.println("reading file: " + xmlFile);
xml = Xml.loadFile(xmlFile);
log.debug("reading file: " + filePath);
xml = Xml.loadFile(file);
} catch (JDOMException e) { // JDOM problem
System.out.println("Error loading XML from file " + xmlFile +", ignoring");
log.debug("Error loading XML from file " + filePath +", ignoring");
e.printStackTrace();
result.badFormat++;
continue; // skip this one
} catch (Exception e) { // some other error
System.out.println("Error retrieving XML from file " + xmlFile +", ignoring");
log.debug("Error retrieving XML from file " + filePath +", ignoring");
e.printStackTrace();
result.unretrievable++;
continue; // skip this one
Expand All @@ -186,7 +152,7 @@ private void align(List<String> results, ResourceManager rm) throws Exception {
try {
Xml.validate(xml);
} catch (Exception e) {
System.out.println("Cannot validate XML from file " + xmlFile +", ignoring. Error was: "+e.getMessage());
log.debug("Cannot validate XML from file " + filePath +", ignoring. Error was: "+e.getMessage());
result.doesNotValidate++;
continue; // skip this one
}
Expand All @@ -197,7 +163,7 @@ private void align(List<String> results, ResourceManager rm) throws Exception {
try {
xml = Xml.transform(xml, thisXslt);
} catch (Exception e) {
System.out.println("Cannot transform XML from file " + xmlFile+", ignoring. Error was: "+e.getMessage());
log.debug("Cannot transform XML from file " + filePath+", ignoring. Error was: "+e.getMessage());
result.badFormat++;
continue; // skip this one
}
Expand All @@ -215,14 +181,40 @@ private void align(List<String> results, ResourceManager rm) throws Exception {
else {
String id = dataMan.getMetadataId(dbms, uuid);
if (id == null) {
System.out.println("adding new metadata");
id = addMetadata(xml, uuid, dbms, schema, localGroups, localCateg);
// For new record change date will be the time
// the record was harvested
String createDate = new ISODate().toString();
// or the last modified date of the file
if (params.checkFileLastModifiedForUpdate) {
createDate = new ISODate(file.lastModified()).toString();
}


log.debug("adding new metadata");
id = addMetadata(xml, uuid, dbms, schema, localGroups, localCateg, createDate);
result.addedMetadata++;
}
else {
System.out.println("updating existing metadata, id is: " + id);
updateMetadata(xml, id, dbms, localGroups, localCateg);
result.updatedMetadata++;
} else {
// Check last modified date of the file with the record change date
// to check if an update is required
if (params.checkFileLastModifiedForUpdate) {
Date fileDate = new Date(file.lastModified());
String modified = dataMan.getMetadataInfo(dbms, id).changeDate;
Date recordDate = new SimpleDateFormat(ISODate.ISO_DATE_FORMAT).parse(modified);

log.debug(" File date is: " + fileDate.toString() + " / record date is: " + modified);
if (recordDate.before(fileDate)) {
log.debug(" Db record is older than file. Updating record with id: " + id);
updateMetadata(xml, id, dbms, localGroups, localCateg);
result.updatedMetadata ++;
} else {
log.debug(" Db record is not older than last modified date of file. No need for update.");
result.unchangedMetadata ++;
}
} else {
log.debug(" updating existing metadata, id is: " + id);
updateMetadata(xml, id, dbms, localGroups, localCateg);
result.updatedMetadata++;
}
}
idsForHarvestingResult.add(id);
}
Expand All @@ -238,16 +230,18 @@ private void align(List<String> results, ResourceManager rm) throws Exception {
for(Element existingId : existingMetadata) {
String ex$ = existingId.getChildText("id");
if(!idsForHarvestingResult.contains(ex$)) {
log.debug(" Removing: " + ex$);
dataMan.deleteMetadata(context, dbms, ex$);
result.locallyRemoved++;
}
}
}
System.out.println("End of alignment for : "+ params.name);
log.debug("End of alignment for : "+ params.name);
return result;
}

private void updateMetadata(Element xml, String id, Dbms dbms, GroupMapper localGroups, CategoryMapper localCateg) throws Exception {
System.out.println(" - Updating metadata with id: "+ id);
log.debug(" - Updating metadata with id: "+ id);

//
// update metadata
Expand Down Expand Up @@ -277,14 +271,14 @@ private void updateMetadata(Element xml, String id, Dbms dbms, GroupMapper local
* @param schema
* @param localGroups
* @param localCateg
* @param createDate TODO
* @throws Exception
*/
private String addMetadata(Element xml, String uuid, Dbms dbms, String schema, GroupMapper localGroups, CategoryMapper localCateg) throws Exception {
System.out.println(" - Adding metadata with remote uuid: "+ uuid);
private String addMetadata(Element xml, String uuid, Dbms dbms, String schema, GroupMapper localGroups, CategoryMapper localCateg, String createDate) throws Exception {
log.debug(" - Adding metadata with remote uuid: "+ uuid);

String source = params.uuid;
String createDate = new ISODate().toString();


//
// insert metadata
//
Expand All @@ -307,11 +301,11 @@ private String addMetadata(Element xml, String uuid, Dbms dbms, String schema, G

@Override
protected void doHarvest(Logger l, ResourceManager rm) throws Exception {
System.out.println("LocalFilesystem doHarvest: top directory is " + params.directoryname + ", recurse is " + params.recurse);
log.debug("LocalFilesystem doHarvest: top directory is " + params.directoryname + ", recurse is " + params.recurse);
File directory = new File(params.directoryname);
List<String> results = harvestFromDirectory(directory, params.recurse);
System.out.println("LocalFilesystem doHarvest: found #" + results.size() + " results");
align(results, rm);
List<File> results = IO.getFilesInDirectory(directory, params.recurse, new XMLExtensionFilenameFilter(XMLExtensionFilenameFilter.ACCEPT_DIRECTORIES));
log.debug("LocalFilesystem doHarvest: found #" + results.size() + " XML files.");
this.result = align(results, rm);
}

@Override
Expand Down
Expand Up @@ -27,6 +27,7 @@
import org.fao.geonet.kernel.DataManager;
import org.fao.geonet.kernel.harvest.harvester.AbstractParams;
import org.jdom.Element;
import org.jfree.util.Log;

/**
* Params for local filesystem harvesting.
Expand All @@ -39,6 +40,7 @@ public class LocalFilesystemParams extends AbstractParams {
public String icon;
public String directoryname;
public boolean recurse;
public boolean checkFileLastModifiedForUpdate;
public boolean nodelete;

public LocalFilesystemParams(DataManager dm) {
Expand Down Expand Up @@ -80,7 +82,9 @@ private void createOrUpdate(Element node) {
recurse = (recurseString.equals("on") || recurseString.equals("true"));
String nodeleteString = Util.getParam(site, "nodelete", "true");
nodelete = (nodeleteString.equals("on") || nodeleteString.equals("true"));
System.out.println("recurse: " + recurse + " nodelete: " + nodelete);
String checkFileLastModifiedForUpdateString = Util.getParam(site, "checkFileLastModifiedForUpdate", "true");
checkFileLastModifiedForUpdate = (checkFileLastModifiedForUpdateString.equals("on") || checkFileLastModifiedForUpdateString.equals("true"));

}

public LocalFilesystemParams copy() {
Expand All @@ -90,6 +94,7 @@ public LocalFilesystemParams copy() {
copy.directoryname = directoryname;
copy.recurse = recurse;
copy.nodelete = nodelete;
return copy;
copy.checkFileLastModifiedForUpdate = checkFileLastModifiedForUpdate;
return copy;
}
}

0 comments on commit f47a03d

Please sign in to comment.