minor fixes
cost formula description
apotocki committed Oct 12, 2016
import com.fluidops.fedx.Config;
import com.fluidops.fedx.FedXFactory;
import com.fluidops.fedx.exception.FedXException;

* Quetzal configurations setup. Need to run one time in the start before query execution
* @author Saleem
Expand All @@ -41,6 +42,10 @@ public static enum Mode {
public static Mode mode; // Index_dominant , ASK_dominant. In first type of mode we make use of sbj, obj authorities to find relevant sources for triple patterns with bound subject or objects e.g ?s owl:sameAs <>, we will perform index lookup for predicate owl:sameAs and objAuthority <> and all the qualifying sources will be added to the set of capable sources for that triple pattern.
// In hybrid mode we make use of SPARQL ASK queries for bound subjects or objects of a common predicate such as owl:sameAs. If Predicate is not common then we use index sbj ,obj authorities parts as explained above

* Quetzal Configurations. Must call this method once before starting source selection.
* mode can be either set to Index_dominant or ASK_dominant. See details in FedSum paper.
public static void initialize()
try {
if (mode == Mode.ASK_DOMINANT)
if (mode == Mode.ASK_DOMINANT) {
} catch (IOException e) {
throw new FedXException(e);
Expand All @@ -65,29 +71,6 @@ public static void reset()
* Quetzal Configurations. Must call this method once before starting source selection.
* mode can be either set to Index_dominant or ASK_dominant. See details in FedSum paper.
* @param inputCommonPredThreshold Threshold value between common and normal predicates
* @param inputMode Source Selection mode i.e. Index_dominant or ASK_dominant
* @param InputFedSummaries Summaries of all the available data sources
* @throws Exception Errors
public static void initialize(String inputFedSummaries, Mode inputMode, double inputCommonPredThreshold) throws Exception
mode = inputMode; //{ASK_dominant, Index_dominant}
commonPredThreshold = inputCommonPredThreshold; //considered a predicate as common predicate if it is present in 33% available data sources
//long startTime = System.currentTimeMillis();
if (mode == Mode.ASK_DOMINANT)

* Initialize list of SPARQL endpoints from FedSummaires
* @throws Exception Errors
Expand All @@ -109,23 +92,25 @@ public static void loadDataSources()
public static void loadCommonPredList() {

String queryString = "Prefix ds:<> "
String queryString = "Prefix ds:<> "
+ " WHERE {?s ds:predicate ?p. }";

TupleQuery tupleQuery = con.prepareTupleQuery(QueryLanguage.SPARQL, queryString);
TupleQueryResult result = tupleQuery.evaluate();
ArrayList<String> FedSumPredicates = new ArrayList<String>();
ArrayList<String> fedSumPredicates = new ArrayList<String>();
for (String predicate : FedSumPredicates)
int dscount = summary.lookupSources(null, null, null).size();

for (String predicate : fedSumPredicates)
int count = 0;
queryString = "Prefix ds:<> "
queryString = "Prefix ds:<> "
+ "SELECT Distinct ?url "
+ " WHERE {?s ds:url ?url. "
+ " ?s ds:capability ?cap. "
Expand All @@ -138,7 +123,7 @@ public static void loadCommonPredList() {;
double threshold = (double) count/dataSources.size();
double threshold = (double) count/dscount;
Expand Up @@ -181,7 +181,7 @@ public int compare(CardinalityVisitor.CardPair cpl, CardinalityVisitor.CardPair

long threads = Config.getConfig().getWorkerThreads();

double hashCost = rightArg.nd.card * C_TRANSFER_TUPLE + (2 + threads - 1)/threads * C_TRANSFER_QUERY + (rightArg.nd.card + rightArg.nd.card) * C_HANDLE_TUPLE;
double hashCost = rightArg.nd.card * C_TRANSFER_TUPLE + (2 + threads - 1)/threads * C_TRANSFER_QUERY + (leftArg.nd.card + rightArg.nd.card) * C_HANDLE_TUPLE;

long bsize = Config.getConfig().getBoundJoinBlockSize();

Expand Up @@ -44,7 +44,7 @@ public static void main(String[] args) throws Exception {
// String FedSummaries = "C://slices/Linked-SQ-DBpedia-Aidan.ttl";
QuetzalConfig.Mode mode = QuetzalConfig.Mode.ASK_DOMINANT;; //{ASK_DOMINANT, INDEX_DOMINANT}
double commonPredThreshold = 0.33 ; //considered a predicate as common predicate if it is presenet in 33% available data sources
QuetzalConfig.initialize(FedSummaries, mode, commonPredThreshold); // must call this function only one time at the start to load configuration information. Please specify the FedSum mode.
QuetzalConfig.initialize();//FedSummaries, mode, commonPredThreshold); // must call this function only one time at the start to load configuration information. Please specify the FedSum mode.
System.out.println("One time configuration loading time : "+ (System.currentTimeMillis()-strtTime));
FedX fed = FederationManager.getInstance().getFederation();
List<Endpoint> members = fed.getMembers();
Expand Up @@ -51,7 +51,7 @@ public static void main(String[] args) throws Exception {
//String FedSummaries = "C://slices/Linked-SQ-DBpedia-Aidan.ttl";
QuetzalConfig.Mode mode = QuetzalConfig.Mode.ASK_DOMINANT;; //{ASK_DOMINANT, INDEX_DOMINANT}
double commonPredThreshold = 0.33 ; //considered a predicate as common predicate if it is presenet in 33% available data sources
QuetzalConfig.initialize(theFedSummaries, mode, commonPredThreshold); // must call this function only one time at the start to load configuration information. Please specify the FedSum mode.
QuetzalConfig.initialize();//theFedSummaries, mode, commonPredThreshold); // must call this function only one time at the start to load configuration information. Please specify the FedSum mode.
System.out.println("One time configuration loading time : "+ (System.currentTimeMillis()-strtTime));
FedX fed = FederationManager.getInstance().getFederation();
List<Endpoint> members = fed.getMembers();
Binary file added desc.pdf
Binary file not shown.
115 changes: 115 additions & 0 deletions desc.tex
@@ -0,0 +1,115 @@
% !TEX TS-program = pdflatex
% !TEX encoding = UTF-8 Unicode

% This is a simple template for a LaTeX document using the "article" class.
% See "book", "report", "letter" for other types of document.

\documentclass[11pt]{article} % use larger type; default would be 10pt

\usepackage[utf8]{inputenc} % set input encoding (not needed with XeLaTeX)

%%% Examples of Article customizations
% These packages are optional, depending whether you want the features they provide.
% See the LaTeX Companion or other references for full information.

\usepackage{geometry} % to change the page dimensions
\geometry{a4paper} % or letterpaper (US) or a5paper or....
% \geometry{margin=2in} % for example, change the margins to 2 inches all round
% \geometry{landscape} % set up the page for landscape
% read geometry.pdf for detailed page layout information

\usepackage{graphicx} % support the \includegraphics command and options

% \usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent

\usepackage{booktabs} % for much better looking tables
\usepackage{array} % for better arrays (eg matrices) in maths
\usepackage{paralist} % very flexible & customisable lists (eg. enumerate/itemize, etc.)
\usepackage{verbatim} % adds environment for commenting out blocks of text & for better verbatim
\usepackage{subfig} % make it possible to include more than one captioned figure/table in a single float
% These packages are all incorporated in the memoir class to one degree or another...

\usepackage{fancyhdr} % This should be set AFTER setting up the page geometry
\pagestyle{fancy} % options: empty , plain , fancy
\renewcommand{\headrulewidth}{0pt} % customise the layout...

\allsectionsfont{\sffamily\mdseries\upshape} % (See the fntguide.pdf for font help)
% (This matches ConTeXt defaults)

%%% ToC (table of contents) APPEARANCE
\usepackage[nottoc,notlof,notlot]{tocbibind} % Put the bibliography in the ToC
\usepackage[titles,subfigure]{tocloft} % Alter the style of the Table of Contents
\renewcommand{\cftsecpagefont}{\rmfamily\mdseries\upshape} % No bold!

%%% END Article customizations

%%% The "real" document content comes below...

\title{Previous Formulas}
%%% \author{The Author}


\section{Previous version}

Card(E1 \bowtie E2) = MIN(Card(E1), Card(E2))

Cost(E1 \bowtie_h E2) = Card(E1) * CRT + Card(E2) * CRT + 2 * CSQ

Cost(E1 \bowtie_b E2) = Card(E1) * CRT + \frac{Card(E1)}{BSZ}* CSQ + Card(E1 \bowtie E2) * CRT

where the cost for sending a SPARQL query is CSQ, the cost for receiving a single result tuple is CRT and BSZ is the size of a bound join block.

In our experiments CSQ=50, CRT=0.02, BSZ=20

\section{Modified version}
Card(E1 \bowtie E2) = MIN(Card(E1), Card(E2)) * MVK(E1) * MVK(E2)
where MVK(E1) and MVK(E2) are multivalue multiplyers of E1 and E2 respectively.
MVK(E)= \left\{
\frac{1}{\sqrt{2}} &: \text{E is a triple pattern like ?s $<$p$>$ $<$o$>$ . }\\
\frac{\text{triple count for given predicate}}{\text{distinct subject count}} &: \text{E is a triple pattern like ?s $<$p$>$ ?o. The join variable is ?s.} \\
\frac{\text{triple count for given predicate}}{\text{distinct object count}} &: \text{E is a triple pattern like ?s $<$p$>$ ?o. The join variable is ?o.} \\
\text{1} &: \text{other cases}

Cost(E1 \bowtie_h E2) = \frac{1 + TC}{TC} * CSQ + Card(E2) * CRT + (Card(E1) + Card(E2)) * CHT
where Card(E1) $<$ Card(E2), TC is the number of threads used to query sparql endpoints, CSQ is the cost of sending a SPARQL query, CRT is the cost of receiving a single result tuple, CHT is the cost of handling received tuple.\\\\
Description: the hash join algorithm sends 2 requests for E1 and E2 using TC threads (cost = first summand in the (5)), then recieves results for E1 and E2 in parallel, so cost = Max(Card(E1), Card(E2)) * CRT = Card(E2) * CRT, finally all the tuples received are
handled: the internal implementaion uses hashmap with synchronized access to store data, so cost can be estimated as (Card(E1) + Card(E2)) * CHT

Cost(E1 \bowtie_b E2) = CSQ + Card(E1) * CRT + \frac{\frac{Card(E1) + BSZ - 1}{BSZ} + CTC - 1}{CTC} * CSQ

The bind join algo at first sends the request for E1 (cost = CSQ), receives results for E1 (cost = Card(E1) * CRT), then using TC threads sends resuests for E2 using bunch of BSZ size (cost = third summand in the formula (6)) \\\\
In our experiments CSQ=100, CRT=0.01, 0.0025, BSZ=20, TC=20


12 changes: 9 additions & 3 deletions fedx/src/main/java/com/fluidops/fedx/provider/
Expand Up @@ -37,10 +37,12 @@ public class ProviderUtil {
* @throws QueryEvaluationException
* @throws MalformedQueryException
public static void checkConnectionIfConfigured(Repository repo) {
public static long checkConnectionIfConfigured(Repository repo) {

if (!Config.getConfig().isValidateRepositoryConnections())
return 0;

long startTime = System.currentTimeMillis();

RepositoryConnection conn = repo.getConnection();
try {
Expand All @@ -51,12 +53,16 @@ public static void checkConnectionIfConfigured(Repository repo) {
if (!qRes.hasNext()) {
log.warn("No data in provided repository (" + repo + ")");
while (qRes.hasNext());

} finally {
if (qRes != null)
if (qRes != null) {
} finally {
return System.currentTimeMillis() - startTime;
Expand Up @@ -50,7 +50,10 @@ public Endpoint loadEndpoint(RepositoryInformation repoInfo) throws FedXExceptio

long rtime = ProviderUtil.checkConnectionIfConfigured(repo);
if (rtime != 0) {
rtime = ProviderUtil.checkConnectionIfConfigured(repo); // measure again

String location = repoInfo.getLocation();
EndpointClassification epc = EndpointClassification.Remote;
Expand All @@ -67,7 +70,7 @@ public Endpoint loadEndpoint(RepositoryInformation repoInfo) throws FedXExceptio
EndpointConfiguration ep = manipulateEndpointConfiguration(location, repoInfo.getEndpointConfiguration());

return res;
} catch (RepositoryException e) {
throw new FedXException("Repository " + repoInfo.getId() + " could not be initialized: " + e.getMessage(), e);
Expand Down
9 changes: 9 additions & 0 deletions fedx/src/main/java/com/fluidops/fedx/structures/
Expand Up @@ -108,6 +108,7 @@ public static boolean isSupportedFormat(String format) {
protected TripleSource tripleSource; // the triple source, initialized when repository is set
protected EndpointConfiguration endpointConfiguration; // additional endpoint type specific configuration

protected long responseTime = 0;
* Construct a new endpoint.
Expand Down Expand Up @@ -153,6 +154,14 @@ public boolean isLocal() {
return endpointClassification==EndpointClassification.Local;

public long getResponseTime() {
return responseTime;

public void setResponseTime(long val) {
responseTime = val;

* Set the underlying initialized repository.
Expand Down

