Skip to content
Browse files

Improved SPARQL matcher

  • Loading branch information...
1 parent 1992a58 commit c562cbddf7a481ea3e027c5c3e844900a0d25d3f @cgueret committed Apr 16, 2012
View
182 src/main/java/nl/vu/queryfinder/services/impl/SPARQLMatcher.java
@@ -2,37 +2,42 @@
import java.io.IOException;
import java.net.URI;
-import java.util.HashSet;
-import java.util.Set;
+import java.util.ArrayList;
+import java.util.List;
import nl.vu.queryfinder.model.Query;
import nl.vu.queryfinder.services.EndPoint;
import nl.vu.queryfinder.services.EndPoint.EndPointType;
import nl.vu.queryfinder.services.Service;
import nl.vu.queryfinder.util.PaginatedQueryExec;
+import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.model.vocabulary.OWL;
-import org.openrdf.model.vocabulary.RDF;
-import org.openrdf.model.vocabulary.RDFS;
+import org.openrdf.repository.RepositoryException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SPARQLMatcher extends Service {
// Logger
private static final Logger logger = LoggerFactory.getLogger(SPARQLMatcher.class);
- // Property types
- private static final Value[] propertyTypes = { RDF.PROPERTY, OWL.DATATYPEPROPERTY, OWL.OBJECTPROPERTY };
+ // Property types ( RDF.PROPERTY )
+ private static final Value[] PROP_TYPES = { OWL.DATATYPEPROPERTY, OWL.OBJECTPROPERTY,
+ OWL.FUNCTIONALPROPERTY };
// The end point to query
- private EndPoint endPoint;
+ private final PaginatedQueryExec exec;
+ private final EndPoint endPoint;
/**
* @param endPoint
+ * @throws RepositoryException
*/
- public SPARQLMatcher(EndPoint endPoint) {
+ public SPARQLMatcher(EndPoint endPoint) throws RepositoryException {
+ // Connect to the end point
this.endPoint = endPoint;
+ exec = new PaginatedQueryExec(endPoint);
}
/*
@@ -50,24 +55,24 @@ public Query process(Query inputQuery) {
* @param keyword
* @return
*/
- private Set<Value> getClasses(String keyword) {
- Set<Value> results = new HashSet<Value>();
+ protected List<Value> getClasses(String keyword) {
+ List<Value> results = new ArrayList<Value>();
// Build the query
String query = "SELECT DISTINCT ?c WHERE {";
query += "?c a <http://www.w3.org/2002/07/owl#Class>.";
if (endPoint.getType().equals(EndPointType.VIRTUOSO)) {
query += "?c <http://www.w3.org/2000/01/rdf-schema#label> ?l.";
- query += "?l bif:contains 'KEYWORD'.";
+ query += "?l bif:contains 'KEYWORD'.} ORDER BY DESC ( <LONG::IRI_RANK> (?c) )";
+ keyword = keyword.replace(" ", " and ");
}
if (endPoint.getType().equals(EndPointType.OWLIM)) {
- query += "?c <http://www.ontotext.com/owlim/lucene#> 'KEYWORD'.";
+ query += "?c <http://www.ontotext.com/owlim/lucene#> 'KEYWORD'.}";
}
- query += "}";
- query.replace("KEYWORD", keyword);
+ query = query.replace("KEYWORD", keyword);
// Process the query
- results.addAll(PaginatedQueryExec.process(endPoint, query, "c"));
+ results.addAll(exec.process(query, "c"));
logger.info(String.format("[class] \"%s\" -> %d", keyword, results.size()));
@@ -78,31 +83,25 @@ public Query process(Query inputQuery) {
* @param keyword
* @return
*/
- private Set<Value> getProperties(String keyword) {
- Set<Value> results = new HashSet<Value>();
-
- for (EndPoint endPoint : endPoints) {
- Value var = Value.createVariable("r");
- Value label = Value.createVariable("l");
- Query query = QueryFactory.create();
- query.setQuerySelectType();
- query.setDistinct(true);
- query.addResultVar(var);
-
- for (Value propertyType : propertyTypes) {
- ElementGroup group = new ElementGroup();
- group.addTriplePattern(new Triple(var, RDF.type.asValue(), propertyType));
- if (endPoint.getType().equals(EndPointType.VIRTUOSO)) {
- String text = "'" + keyword + "'";
- group.addTriplePattern(new Triple(var, RDFS.label.asValue(), label));
- group.addTriplePattern(new Triple(label, Value.createURI("bif:contains"), Value.createLiteral(text)));
- } else if (endPoint.getType().equals(EndPointType.OWLIM)) {
- group.addTriplePattern(new Triple(var, Value.createURI("http://www.ontotext.com/owlim/lucene#"),
- Value.createLiteral(keyword)));
- }
- query.setQueryPattern(group);
- results.addAll(PaginatedQueryExec.process(endPoint, query, var));
+ protected List<Value> getProperties(String keyword) {
+ List<Value> results = new ArrayList<Value>();
+
+ for (Value propertyType : PROP_TYPES) {
+ // Build the query
+ String query = "SELECT DISTINCT ?c WHERE {";
+ query += "?c a <" + propertyType + ">.";
+ if (endPoint.getType().equals(EndPointType.VIRTUOSO)) {
+ query += "?c <http://www.w3.org/2000/01/rdf-schema#label> ?l.";
+ query += "?l bif:contains 'KEYWORD'.} ORDER BY DESC ( <LONG::IRI_RANK> (?c) )";
+ keyword = keyword.replace(" ", " and ");
}
+ if (endPoint.getType().equals(EndPointType.OWLIM)) {
+ query += "?c <http://www.ontotext.com/owlim/lucene#> 'KEYWORD'.}";
+ }
+ query = query.replace("KEYWORD", keyword);
+
+ // Process the query
+ results.addAll(exec.process(query, "c"));
}
logger.info(String.format("[property] \"%s\" -> %d", keyword, results.size()));
@@ -116,52 +115,87 @@ public Query process(Query inputQuery) {
* @param context
* @return
*/
- private Set<Value> getResources(String keyword, Triple context) {
- Set<Value> results = new HashSet<Value>();
-
- for (EndPoint endPoint : endPoints) {
- Value var = Value.createVariable("r");
- Value label = Value.createVariable("l");
- Query query = QueryFactory.create();
- query.setQuerySelectType();
- query.setDistinct(true);
- query.addResultVar(var);
- ElementGroup group = new ElementGroup();
- group.addTriplePattern(new Triple(var, RDF.type.asValue(), Value.createAnon()));
- if (context != null)
- group.addTriplePattern(context);
- if (endPoint.getType().equals(EndPointType.VIRTUOSO)) {
- // String text = StringUtils.join(keyword.split(" "), " and ");
- String text = "'" + keyword + "'";
- group.addTriplePattern(new Triple(var, RDFS.label.asValue(), label));
- group.addTriplePattern(new Triple(label, Value.createURI("bif:contains"), Value.createLiteral(text)));
- } else if (endPoint.getType().equals(EndPointType.OWLIM)) {
- group.addTriplePattern(new Triple(var, Value.createURI("http://www.ontotext.com/owlim/lucene#"), Value
- .createLiteral(keyword)));
- }
- query.setQueryPattern(group);
- results.addAll(PaginatedQueryExec.process(endPoint, query, var));
+ protected List<Value> getResources(String keyword, Statement context) {
+ List<Value> results = new ArrayList<Value>();
+
+ // Connect to the end point
+ PaginatedQueryExec exec = null;
+ try {
+ exec = new PaginatedQueryExec(endPoint);
+ } catch (RepositoryException e) {
+ e.printStackTrace();
+ return results;
+ }
+
+ // Build the query
+ String query = "SELECT DISTINCT ?c WHERE {";
+ query += "?c a <" + OWL.NAMESPACE + "Thing>.";
+ if (endPoint.getType().equals(EndPointType.VIRTUOSO)) {
+ query += "?c <http://www.w3.org/2000/01/rdf-schema#label> ?l.";
+ query += "?l bif:contains 'KEYWORD'.} ORDER BY DESC ( <LONG::IRI_RANK> (?c) )";
+ keyword = keyword.replace(" ", " and ");
}
+ if (endPoint.getType().equals(EndPointType.OWLIM)) {
+ query += "?c <http://www.ontotext.com/owlim/lucene#> 'KEYWORD'.}";
+ }
+ query = query.replace("KEYWORD", keyword);
+
+ // Process the query
+ results.addAll(exec.process(query, "c"));
- logger.info(String.format("[resource] \"%s\" -> %d (%s)", keyword, results.size(), context.getPredicate()));
+ if (context != null)
+ logger.info(String.format("[resource] \"%s\" -> %d (%s)", keyword, results.size(), context.getPredicate()));
+ else
+ logger.info(String.format("[resource] \"%s\" -> %d", keyword, results.size()));
return results;
}
/**
* @param args
* @throws IOException
+ * @throws RepositoryException
*/
- public static void main(String[] args) throws IOException {
- // new EndPoint("http://dbpedia.org/sparql", "http://dbpedia.org",
- // EndPointType.VIRTUOSO);
+ public static void main(String[] args) throws IOException, RepositoryException {
+ EndPoint endPoint = new EndPoint(URI.create("http://dbpedia.org/sparql"), "http://dbpedia.org",
+ EndPointType.VIRTUOSO);
- EndPoint endPoint = new EndPoint(URI.create("http://factforge.net/sparql"), null, EndPointType.OWLIM);
+ // EndPoint endPoint = new
+ // EndPoint(URI.create("http://factforge.net/sparql"), null,
+ // EndPointType.OWLIM);
SPARQLMatcher me = new SPARQLMatcher(endPoint);
- logger.info("artist : " + me.getClasses("artist").size());
- logger.info("field : " + me.getProperties("field").size());
- logger.info("birth : " + me.getProperties("birth").size());
- logger.info("amsterdam : " + me.getResources("amsterdam", null).size());
- logger.info("Netherlands : " + me.getResources("Netherlands", null).size());
+ List<Value> res = null;
+
+ res = me.getClasses("artist");
+ logger.info("artist : " + res.size());
+ for (Value v : res)
+ logger.info(v.toString());
+
+ res = me.getProperties("field");
+ logger.info("field : " + res.size());
+ for (Value v : res)
+ logger.info(v.toString());
+
+ /*
+ res = me.getProperties("birth");
+ logger.info("birth : " + res.size());
+ for (Value v : res)
+ logger.info(v.toString());
+
+ res = me.getResources("amsterdam", null);
+ logger.info("amsterdam : " + res.size());
+ for (Value v : res)
+ logger.info(v.toString());
+
+ res = me.getResources("Netherlands", null);
+ logger.info("Netherlands : " + res.size());
+ for (Value v : res)
+ logger.info(v.toString());
+
+ res = me.getResources("hip hop", null);
+ logger.info("hip hop : " + res.size());
+ for (Value v : res)
+ logger.info(v.toString());
+ */
}
}
View
21 src/main/java/nl/vu/queryfinder/util/PaginatedQueryExec.java
@@ -2,8 +2,8 @@
import java.net.URI;
import java.net.URISyntaxException;
-import java.util.HashSet;
-import java.util.Set;
+import java.util.ArrayList;
+import java.util.List;
import nl.vu.queryfinder.services.EndPoint;
import nl.vu.queryfinder.services.EndPoint.EndPointType;
@@ -20,7 +20,8 @@
public class PaginatedQueryExec {
protected static final Logger logger = LoggerFactory.getLogger(PaginatedQueryExec.class);
- private final static int PAGE_SIZE = 1000;
+ private final static int PAGE_SIZE = 600;
+ private final static int HARD_LIMIT = 50;
SPARQLRepository repository;
/**
@@ -39,8 +40,8 @@ public PaginatedQueryExec(EndPoint endPoint) throws RepositoryException {
* @return
* @throws RepositoryException
*/
- public Set<Value> process(String query, String varName) {
- Set<Value> results = new HashSet<Value>();
+ public List<Value> process(String query, String varName) {
+ List<Value> results = new ArrayList<Value>();
try {
@@ -49,12 +50,14 @@ public PaginatedQueryExec(EndPoint endPoint) throws RepositoryException {
int offset = 0;
RepositoryConnection conn = repository.getConnection();
- while (morePages) {
+ while (morePages && results.size() < HARD_LIMIT) {
long count = 0;
String queryPage = query;
- queryPage += "LIMIT " + limit + " OFFSET " + offset;
- TupleQuery tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryPage);
+ queryPage += " LIMIT " + limit + " OFFSET " + offset;
+
+ logger.info("Query \n" + queryPage);
+ TupleQuery tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryPage);
TupleQueryResult res = tupleQuery.evaluate();
while (res.hasNext()) {
results.add(res.next().getValue(varName));
@@ -83,7 +86,7 @@ public static void main(String[] args) throws URISyntaxException, RepositoryExce
String query = "Select distinct ?o where {<http://dbpedia.org/resource/Amsterdam> ?p ?o}";
EndPoint endPoint = new EndPoint(new URI("http://dbpedia.org/sparql"), null, EndPointType.VIRTUOSO);
PaginatedQueryExec exec = new PaginatedQueryExec(endPoint);
- Set<Value> r = exec.process(query, "o");
+ List<Value> r = exec.process(query, "o");
logger.info(r.toString());
exec.shutDown();
logger.info("ok");

0 comments on commit c562cbd

Please sign in to comment.
Something went wrong with that request. Please try again.