Skip to content

Commit

Permalink
Initial commit that parses exported or storage foxml.
Browse files Browse the repository at this point in the history
  • Loading branch information
mikedurbin committed Feb 3, 2015
0 parents commit 675c33c
Show file tree
Hide file tree
Showing 35 changed files with 2,705 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
target/
.idea
64 changes: 64 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.fcrepo.migration</groupId>
<artifactId>migration-utils</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>

<name>migration-utils</name>
<url>http://maven.apache.org</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.10</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.10.3</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.10.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.3.6</version>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
39 changes: 39 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
A framework to support migration of data from Fedora 3 to Fedora 4 repositories.

# Overview

The basic program allows for a configuration to define one or more Fedora Object Handlers and
a repository source. The handlers will in turn be provided information about each each object
in the repository under the theory that one ore more Handler implementations may be written to
achieve whatever complex data migration or analysis is desired.

# Status

Currently this application is far from ready for use, but the basic framework is presented to allow:

1. testing to ensure that it does indeed completely and accurately parse FOXML from various fedora versions and contexts
2. the framework to be discussed and improved before complex mappings and migration scenarios are implemented

# Usage

To get basic output for a directory of FOXML that have been produced using the REST API's export with the "archive" context:

``` mvn clean compile exec:java -Dexec.mainClass=org.fcrepo.migration.Migrator -Dexec.args="path/to/exported/foxml" ```

To get basic output for a directory of FOXML read from fedora's storage:

``` mvn clean compile exec:java -Dexec.mainClass=org.fcrepo.migration.Migrator -Dexec.args="path/to/fedora/data/objectStore path/to/fedora/data/datastreamStore work/directory" ```

# Development

To do something more sophisticated (like actually migrating or analyzing content) implement
FedoraObjectHandler and update Migrator before running.

# Additional Documentation
* [wiki](https://wiki.duraspace.org/display/FF/Fedora+3+to+4+Data+Migration)






18 changes: 18 additions & 0 deletions src/main/java/org/fcrepo/migration/ContentDigest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package org.fcrepo.migration;

/**
* An interface defining access to information about a fedora datastream's
* content digest.
*/
public interface ContentDigest {

/**
* Gets the type: one of several defined in the fedora foxml schema.
*/
public String getType();

/**
* Gets the value of the content digest.
*/
public String getDigest();
}
40 changes: 40 additions & 0 deletions src/main/java/org/fcrepo/migration/DatastreamInfo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package org.fcrepo.migration;

/**
* An interface defining access to information about a fedora datastream.
*/
public interface DatastreamInfo {

/**
* Gets the information about the object to which this datastream
* belongs.
*/
public ObjectInfo getObjectInfo();

/**
* Gets the identifier for this datastream (Unique within an object).
*/
public String getDatastreamId();

/**
* Gets the control group for this datastream. This is expected to be
* "M", "X", "R" or "E".
*/
public String getControlGroup();

/**
* Gets the fedora URI for this datastream.
*/
public String getFedoraURI();

/**
* Gets the state for this datastream. This is expected to be "A", "I" or
* "D".
*/
public String getState();

/**
* Returns true if this datastream was/is versionable.
*/
public boolean getVersionable();
}
69 changes: 69 additions & 0 deletions src/main/java/org/fcrepo/migration/DatastreamVersion.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package org.fcrepo.migration;

import java.io.IOException;
import java.io.InputStream;

/**
* An interface defining access to information about a version of a
* fedora datastream.
*/
public interface DatastreamVersion {

/**
* Gets the information about the datastream for which this is
* a version. (which in turn can be queried to get information about
* the object).
*/
public DatastreamInfo getDatastreamInfo();

/**
* Gets the id for this version.
*/
public String getVersionId();

/**
* Gets the mime type for this version.
*/
public String getMimeType();

/**
* Gets the label for this version.
*/
public String getLabel();

/**
* Gets the date when this version was created.
*/
public String getCreated();

/**
* Gets the altIDs value for this version.
*/
public String getAltIds();

/**
* Gets the format URI for this version.
*/
public String getFormatUri();

/**
* Gets the size (in bytes) for the content of this datastream
* version.
*/
public long getSize();

/**
* Gets the content digest (if available) for this version.
*/
public ContentDigest getContentDigest();

/**
* Gets access to the content of this datastream. When text, the
* encoding can be expected to be UTF-8.
* @throws IllegalStateException if invoked outside of the call
* to @{link FedoraObjectHandler#processDatastreamVersion}
* @throws IOException when unable to access the stream
*/
public InputStream getContent() throws IOException;

}
27 changes: 27 additions & 0 deletions src/main/java/org/fcrepo/migration/DefaultContentDigest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.fcrepo.migration;

/**
* A default implementation of ContentDigest that accepts
* values at construction time.
*/
public class DefaultContentDigest implements ContentDigest {

private String type;

private String digest;

public DefaultContentDigest(String type, String digest) {
this.type = type;
this.digest = digest;
}

@Override
public String getType() {
return type;
}

@Override
public String getDigest() {
return digest;
}
}
30 changes: 30 additions & 0 deletions src/main/java/org/fcrepo/migration/DefaultObjectInfo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.fcrepo.migration;

import java.io.InputStream;

/**
* A default implementation of ObjectInfo that accepts
* values at construction time.
*/
public class DefaultObjectInfo implements ObjectInfo {

private String pid;

private String uri;

public DefaultObjectInfo(String pid, String uri) {
this.pid = pid;
this.uri = uri;
}

@Override
public String getPid() {
return pid;
}

@Override
public String getFedoraURI() {
return uri;
}

}
44 changes: 44 additions & 0 deletions src/main/java/org/fcrepo/migration/FedoraObjectHandler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package org.fcrepo.migration;

import java.io.IOException;

/**
* An interface with methods that are meant to be invoked when processing
* a fedora 3 object such that every bit of information in that fedora 3
* object is exposed to the instance implementing this interface.
*
* Instances of this class are expected to be used for a single fedora
* object, and method calls should not require implementations to maintain
* state.
*/
public interface FedoraObjectHandler {

public void beginObject(ObjectInfo object);

/**
* Invoked to allow processing of properties by this FedoraObjectHandler.
* @param properties the properties for the object
*/
public void processObjectProperties(ObjectProperties properties);

/**
* Invoked to allow processing of a datastream by this FedoraObjectHandler.
* @param dsVersion an encapsulation of the datastream version. References to this object must
* not be retained or referenced outside of implementations of this method as
* attached resources (cached files, references to streams) may be updated and
* no longer valid.
*/
public void processDatastreamVersion(DatastreamVersion dsVersion);

/**
* A hook called after the object has been completely processed. This may be useful for any cleanup or
* finalization routines. Furthermore, once this method invokation is complete, any references
* provided to prior calls will no longer be in scope.
*/
public void completeObject(ObjectInfo object);

/**
* Invoked if processing of the object failed for some reason.
*/
public void abortObject(ObjectInfo object);
}
15 changes: 15 additions & 0 deletions src/main/java/org/fcrepo/migration/FedoraObjectProcessor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package org.fcrepo.migration;

import javax.xml.stream.XMLStreamException;

/**
* A class that encapsulates an object for processing. This class represents a single object and
* exposes methods to query basic information about it and then to process it with an arbitrary
* FedoraObjectHandler.
*/
public interface FedoraObjectProcessor {

public ObjectInfo getObjectInfo();

public void processObject(FedoraObjectHandler handler) throws XMLStreamException;
}
Loading

0 comments on commit 675c33c

Please sign in to comment.