Skip to content

Commit

Permalink
rfe11121: Support transactional duplicate deletion
Browse files Browse the repository at this point in the history
<release-note>
rfe11121: Support transactional duplicate deletion

With this change, the Java client now provides the method
AGRepositoryConnection#deleteDuplicates(comparisonMode) for
deleting spog and spo duplicates in a transactional manner.
See the javadoc for more details.
</release-note>

Added DeleteDuplicatesTests to prepush tests.
make prepush passes
make javadoc runs clean.

Change-Id: I4012f86efc3617c68d7da03ac1c6ff12ca65b83e
Reviewed-on: https://gerrit.franz.com:9080/1786
Reviewed-by: John O'Rourke <jor@franz.com>
Reviewed-by: Ahmon Dancy <dancy@franz.com>
Tested-by: Kevin Layer <layer@franz.com>
  • Loading branch information
Bill Millar authored and dklayer committed Nov 22, 2011
1 parent 28e2a84 commit 188efd9
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 3 deletions.
31 changes: 29 additions & 2 deletions src/com/franz/agraph/http/AGHttpRepoClient.java
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -863,8 +863,8 @@ public synchronized void close() throws AGHttpException {
/** /**
* Creates a new freetext index with the given parameters. * Creates a new freetext index with the given parameters.
* *
* See documentation here: * See also the protocol documentation for
* <a href="http://www.franz.com/agraph/support/documentation/current/http-protocol.html#put-freetext-index">put-freetext-index</a> * <a href="http://www.franz.com/agraph/support/documentation/current/http-protocol.html#put-freetext-index">freetext index parameters</a>.
*/ */
public void createFreetextIndex(String name, List<String> predicates, boolean indexLiterals, List<String> indexLiteralTypes, String indexResources, List<String> indexFields, int minimumWordSize, List<String> stopWords, List<String> wordFilters, List<String> innerChars, List<String> borderChars, String tokenizer) public void createFreetextIndex(String name, List<String> predicates, boolean indexLiterals, List<String> indexLiteralTypes, String indexResources, List<String> indexFields, int minimumWordSize, List<String> stopWords, List<String> wordFilters, List<String> innerChars, List<String> borderChars, String tokenizer)
throws AGHttpException { throws AGHttpException {
Expand Down Expand Up @@ -1530,6 +1530,33 @@ public String[] getBlankNodes(int blankNodeAmount) throws AGHttpException {
return getHTTPClient().getBlankNodes(getRoot(), blankNodeAmount); return getHTTPClient().getBlankNodes(getRoot(), blankNodeAmount);
} }


/**
* Deletes all duplicates from the store.
* <p>
* The comparisonMode determines what will be deemed a "duplicate".
* <p>
* If comparisonMode is "spog", quad parts (s,p,o,g) will all be
* compared when looking for duplicates.
* <p>
* If comparisonMode is "spo", only the (s,p,o) parts will be
* compared; the same triple in different graphs will thus be deemed
* duplicates.
* <p>
* See also the protocol documentation for
* <a href="http://www.franz.com/agraph/support/documentation/current/http-protocol.html#delete-statements-duplicates">deleting duplicates</a>
* @param comparisonMode determines what is a duplicate
* @throws AGHttpException
*/
public void deleteDuplicates(String comparisonMode) throws AGHttpException {
String url = Protocol.getStatementsLocation(getRoot()) + "/duplicates";
Header[] headers = {};
List<NameValuePair> params = new ArrayList<NameValuePair>(2);
if (comparisonMode!=null) {
params.add(new NameValuePair("mode", comparisonMode));
}
getHTTPClient().delete(url,headers,params.toArray(new NameValuePair[params.size()]));
}

public void optimizeIndices(Boolean wait, int level) throws AGHttpException { public void optimizeIndices(Boolean wait, int level) throws AGHttpException {
String url = repoRoot + "/indices/optimize"; String url = repoRoot + "/indices/optimize";
Header[] headers = {}; Header[] headers = {};
Expand Down
21 changes: 21 additions & 0 deletions src/com/franz/agraph/repository/AGRepositoryConnection.java
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -1591,4 +1591,25 @@ public void putSpinMagicProperty(AGSpinMagicProperty fn) throws OpenRDFException
getHttpRepoClient().putSpinMagicProperty(fn); getHttpRepoClient().putSpinMagicProperty(fn);
} }


/**
* Deletes all duplicates from the store.
* <p>
* The comparisonMode determines what will be deemed a "duplicate".
* <p>
* If comparisonMode is "spog", quad parts (s,p,o,g) will all be
* compared when looking for duplicates.
* <p>
* If comparisonMode is "spo", only the (s,p,o) parts will be
* compared; the same triple in different graphs will thus be deemed
* duplicates.
* <p>
* See also the protocol documentation for
* <a href="http://www.franz.com/agraph/support/documentation/current/http-protocol.html#delete-statements-duplicates">deleting duplicates</a>
* @param comparisonMode determines what is a duplicate
* @throws AGHttpException
*/
public void deleteDuplicates(String comparisonMode) throws RepositoryException {
getHttpRepoClient().deleteDuplicates(comparisonMode);
}

} }
102 changes: 102 additions & 0 deletions src/test/DeleteDuplicatesTests.java
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1,102 @@
/******************************************************************************
** Copyright (c) 2008-2011 Franz Inc.
** All rights reserved. This program and the accompanying materials
** are made available under the terms of the Eclipse Public License v1.0
** which accompanies this distribution, and is available at
** http://www.eclipse.org/legal/epl-v10.html
******************************************************************************/

package test;

import java.io.File;

import junit.framework.Assert;

import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.openrdf.model.vocabulary.RDF;

import com.franz.agraph.repository.AGRDFFormat;
import com.franz.agraph.repository.AGRepositoryConnection;

public class DeleteDuplicatesTests extends AGAbstractTest {

@Test
@Category(TestSuites.Prepush.class)
public void testSPOG() throws Exception {
conn.add(new File("src/test/example.nq"), null, AGRDFFormat.NQUADS);
Assert.assertEquals("expected size 10", 10, conn.size());
conn.add(new File("src/test/example.nq"), null, AGRDFFormat.NQUADS);
Assert.assertEquals("expected size 20", 20, conn.size());
conn.deleteDuplicates("spog");
// Note: this doesn't result in 10 triples, due to blank nodes.
Assert.assertEquals("expected size 15", 15, conn.size());
}

@Test
@Category(TestSuites.Prepush.class)
public void testSPO() throws Exception {
conn.add(new File("src/test/example.nq"), null, AGRDFFormat.NQUADS);
Assert.assertEquals("expected size 10", 10, conn.size());
// add an spo duplicate
conn.add(vf.createURI("http://example.org/alice/foaf.rdf#me"),
RDF.TYPE,vf.createURI("http://xmlns.com/foaf/0.1/Person"));
Assert.assertEquals("expected size 11", 11, conn.size());
conn.deleteDuplicates("spog");
// there are no spog duplicates
Assert.assertEquals("expected size 11", 11, conn.size());
conn.deleteDuplicates("spo");
Assert.assertEquals("expected size 10", 10, conn.size());
}

@Test
@Category(TestSuites.Broken.class)
public void testDefault() throws Exception {
conn.add(new File("src/test/example.nq"), null, AGRDFFormat.NQUADS);
Assert.assertEquals("expected size 10", 10, conn.size());
// add an spo duplicate
conn.add(vf.createURI("http://example.org/alice/foaf.rdf#me"),
RDF.TYPE,vf.createURI("http://xmlns.com/foaf/0.1/Person"));
Assert.assertEquals("expected size 11", 11, conn.size());
// null is the default, "spog"
conn.deleteDuplicates(null);
// there are no spog duplicates
Assert.assertEquals("expected size 11", 11, conn.size());
}

@Test
@Category(TestSuites.Prepush.class)
public void testCommit() throws Exception {
conn.add(new File("src/test/example.nq"), null, AGRDFFormat.NQUADS);
Assert.assertEquals("expected size 10", 10, conn.size());
AGRepositoryConnection conn2 = repo.getConnection();
conn.setAutoCommit(false);
conn.add(new File("src/test/example.nq"), null, AGRDFFormat.NQUADS);
Assert.assertEquals("expected size 20", 20, conn.size());
conn.deleteDuplicates("spog");
// Note: this doesn't result in 10 triples, due to blank nodes.
Assert.assertEquals("expected size 15", 15, conn.size());
Assert.assertEquals("expected conn2 size 10", 10, conn2.size());
conn.commit();
Assert.assertEquals("expected size 15", 15, conn.size());
Assert.assertEquals("expected conn2 size 15", 15, conn2.size());
}

@Test
@Category(TestSuites.Prepush.class)
public void testRollback() throws Exception {
conn.add(new File("src/test/example.nq"), null, AGRDFFormat.NQUADS);
Assert.assertEquals("expected size 10", 10, conn.size());
conn.add(new File("src/test/example.nq"), null, AGRDFFormat.NQUADS);
Assert.assertEquals("expected size 20", 20, conn.size());
AGRepositoryConnection conn2 = repo.getConnection();
conn.setAutoCommit(false);
conn.deleteDuplicates("spog");
// Note: this doesn't result in 10 triples, due to blank nodes.
Assert.assertEquals("expected size 15", 15, conn.size());
Assert.assertEquals("expected conn2 size 20", 20, conn2.size());
conn.rollback();
Assert.assertEquals("expected size 20", 20, conn.size());
}

}
3 changes: 2 additions & 1 deletion src/test/TestSuites.java
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ public static class Temp {}
MappingsTests.class, MappingsTests.class,
DynamicCatalogTests.class, DynamicCatalogTests.class,
SpinTest.class, SpinTest.class,
FreetextTests.class FreetextTests.class,
DeleteDuplicatesTests.class
}) })
public static class Prepush {} public static class Prepush {}


Expand Down

0 comments on commit 188efd9

Please sign in to comment.