Skip to content

Commit

Permalink
minor fixes
Browse files Browse the repository at this point in the history
cost formula description
  • Loading branch information
apotocki committed Oct 12, 2016
1 parent 1aa31ac commit 3b101c3
Show file tree
Hide file tree
Showing 9 changed files with 156 additions and 38 deletions.
Expand Up @@ -17,6 +17,7 @@
import com.fluidops.fedx.Config;
import com.fluidops.fedx.FedXFactory;
import com.fluidops.fedx.exception.FedXException;

/**
* Quetzal configurations setup. Need to run one time in the start before query execution
* @author Saleem
Expand All @@ -41,6 +42,10 @@ public static enum Mode {
public static Mode mode; // Index_dominant , ASK_dominant. In first type of mode we make use of sbj, obj authorities to find relevant sources for triple patterns with bound subject or objects e.g ?s owl:sameAs <http://dbpedia.org/resource/Barack_Obama>, we will perform index lookup for predicate owl:sameAs and objAuthority <http://dbpedia.org> and all the qualifying sources will be added to the set of capable sources for that triple pattern.
// In hybrid mode we make use of SPARQL ASK queries for bound subjects or objects of a common predicate such as owl:sameAs. If Predicate is not common then we use index sbj ,obj authorities parts as explained above

/**
* Quetzal Configurations. Must call this method once before starting source selection.
* mode can be either set to Index_dominant or ASK_dominant. See details in FedSum paper.
*/
public static void initialize()
{
try {
Expand All @@ -50,8 +55,9 @@ public static void initialize()

//loadDataSources(); skip, must be initialized explicitly by user

if (mode == Mode.ASK_DOMINANT)
if (mode == Mode.ASK_DOMINANT) {
loadCommonPredList();
}
} catch (IOException e) {
throw new FedXException(e);
}
Expand All @@ -65,29 +71,6 @@ public static void reset()
sumRepository = null;
}

/**
* Quetzal Configurations. Must call this method once before starting source selection.
* mode can be either set to Index_dominant or ASK_dominant. See details in FedSum paper.
* @param inputCommonPredThreshold Threshold value between common and normal predicates
* @param inputMode Source Selection mode i.e. Index_dominant or ASK_dominant
* @param InputFedSummaries Summaries of all the available data sources
* @throws Exception Errors
*/
@Deprecated
public static void initialize(String inputFedSummaries, Mode inputMode, double inputCommonPredThreshold) throws Exception
{
Config.initialize();
mode = inputMode; //{ASK_dominant, Index_dominant}
commonPredThreshold = inputCommonPredThreshold; //considered a predicate as common predicate if it is present in 33% available data sources
//long startTime = System.currentTimeMillis();
loadFedSummaries(inputFedSummaries);
//System.out.println("Index Load Time: "+ (System.currentTimeMillis()-startTime));

loadDataSources();
if (mode == Mode.ASK_DOMINANT)
loadCommonPredList();
}

/**
* Initialize list of SPARQL endpoints from FedSummaires
* @throws Exception Errors
Expand All @@ -109,23 +92,25 @@ public static void loadDataSources()
*/
public static void loadCommonPredList() {

String queryString = "Prefix ds:<http://aksw.org/fedsum/> "
String queryString = "Prefix ds:<http://aksw.org/quetsal/> "
+ "SELECT DISTINCT ?p "
+ " WHERE {?s ds:predicate ?p. }";

TupleQuery tupleQuery = con.prepareTupleQuery(QueryLanguage.SPARQL, queryString);
TupleQueryResult result = tupleQuery.evaluate();
ArrayList<String> FedSumPredicates = new ArrayList<String>();
ArrayList<String> fedSumPredicates = new ArrayList<String>();
while(result.hasNext())
{
FedSumPredicates.add(result.next().getValue("p").stringValue());
fedSumPredicates.add(result.next().getValue("p").stringValue());
}
//---check each distinct

for (String predicate : FedSumPredicates)
int dscount = summary.lookupSources(null, null, null).size();

for (String predicate : fedSumPredicates)
{
int count = 0;
queryString = "Prefix ds:<http://aksw.org/fedsum/> "
queryString = "Prefix ds:<http://aksw.org/quetsal/> "
+ "SELECT Distinct ?url "
+ " WHERE {?s ds:url ?url. "
+ " ?s ds:capability ?cap. "
Expand All @@ -138,7 +123,7 @@ public static void loadCommonPredList() {
result.next();
count++;
}
double threshold = (double) count/dataSources.size();
double threshold = (double) count/dscount;
if(threshold>=commonPredThreshold)
commonPredicates.add(predicate);
}
Expand Down
Expand Up @@ -181,7 +181,7 @@ public int compare(CardinalityVisitor.CardPair cpl, CardinalityVisitor.CardPair

long threads = Config.getConfig().getWorkerThreads();

double hashCost = rightArg.nd.card * C_TRANSFER_TUPLE + (2 + threads - 1)/threads * C_TRANSFER_QUERY + (rightArg.nd.card + rightArg.nd.card) * C_HANDLE_TUPLE;
double hashCost = rightArg.nd.card * C_TRANSFER_TUPLE + (2 + threads - 1)/threads * C_TRANSFER_QUERY + (leftArg.nd.card + rightArg.nd.card) * C_HANDLE_TUPLE;

long bsize = Config.getConfig().getBoundJoinBlockSize();

Expand Down
Expand Up @@ -44,7 +44,7 @@ public static void main(String[] args) throws Exception {
// String FedSummaries = "C://slices/Linked-SQ-DBpedia-Aidan.ttl";
QuetzalConfig.Mode mode = QuetzalConfig.Mode.ASK_DOMINANT;; //{ASK_DOMINANT, INDEX_DOMINANT}
double commonPredThreshold = 0.33 ; //considered a predicate as common predicate if it is presenet in 33% available data sources
QuetzalConfig.initialize(FedSummaries, mode, commonPredThreshold); // must call this function only one time at the start to load configuration information. Please specify the FedSum mode.
QuetzalConfig.initialize();//FedSummaries, mode, commonPredThreshold); // must call this function only one time at the start to load configuration information. Please specify the FedSum mode.
System.out.println("One time configuration loading time : "+ (System.currentTimeMillis()-strtTime));
FedX fed = FederationManager.getInstance().getFederation();
List<Endpoint> members = fed.getMembers();
Expand Down
Expand Up @@ -51,7 +51,7 @@ public static void main(String[] args) throws Exception {
//String FedSummaries = "C://slices/Linked-SQ-DBpedia-Aidan.ttl";
QuetzalConfig.Mode mode = QuetzalConfig.Mode.ASK_DOMINANT;; //{ASK_DOMINANT, INDEX_DOMINANT}
double commonPredThreshold = 0.33 ; //considered a predicate as common predicate if it is presenet in 33% available data sources
QuetzalConfig.initialize(theFedSummaries, mode, commonPredThreshold); // must call this function only one time at the start to load configuration information. Please specify the FedSum mode.
QuetzalConfig.initialize();//theFedSummaries, mode, commonPredThreshold); // must call this function only one time at the start to load configuration information. Please specify the FedSum mode.
System.out.println("One time configuration loading time : "+ (System.currentTimeMillis()-strtTime));
FedX fed = FederationManager.getInstance().getFederation();
List<Endpoint> members = fed.getMembers();
Expand Down
Binary file added desc.pdf
Binary file not shown.
115 changes: 115 additions & 0 deletions desc.tex
@@ -0,0 +1,115 @@
% !TEX TS-program = pdflatex
% !TEX encoding = UTF-8 Unicode

% This is a simple template for a LaTeX document using the "article" class.
% See "book", "report", "letter" for other types of document.

\documentclass[11pt]{article} % use larger type; default would be 10pt

\usepackage[utf8]{inputenc} % set input encoding (not needed with XeLaTeX)

%%% Examples of Article customizations
% These packages are optional, depending whether you want the features they provide.
% See the LaTeX Companion or other references for full information.

%%% PAGE DIMENSIONS
\usepackage{geometry} % to change the page dimensions
\geometry{a4paper} % or letterpaper (US) or a5paper or....
% \geometry{margin=2in} % for example, change the margins to 2 inches all round
% \geometry{landscape} % set up the page for landscape
% read geometry.pdf for detailed page layout information

\usepackage{graphicx} % support the \includegraphics command and options

% \usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent

%%% PACKAGES
\usepackage{booktabs} % for much better looking tables
\usepackage{array} % for better arrays (eg matrices) in maths
\usepackage{paralist} % very flexible & customisable lists (eg. enumerate/itemize, etc.)
\usepackage{verbatim} % adds environment for commenting out blocks of text & for better verbatim
\usepackage{subfig} % make it possible to include more than one captioned figure/table in a single float
\usepackage{amsmath}
% These packages are all incorporated in the memoir class to one degree or another...

%%% HEADERS & FOOTERS
\usepackage{fancyhdr} % This should be set AFTER setting up the page geometry
\pagestyle{fancy} % options: empty , plain , fancy
\renewcommand{\headrulewidth}{0pt} % customise the layout...
\lhead{}\chead{}\rhead{}
\lfoot{}\cfoot{\thepage}\rfoot{}

%%% SECTION TITLE APPEARANCE
\usepackage{sectsty}
\allsectionsfont{\sffamily\mdseries\upshape} % (See the fntguide.pdf for font help)
% (This matches ConTeXt defaults)

%%% ToC (table of contents) APPEARANCE
\usepackage[nottoc,notlof,notlot]{tocbibind} % Put the bibliography in the ToC
\usepackage[titles,subfigure]{tocloft} % Alter the style of the Table of Contents
\renewcommand{\cftsecfont}{\rmfamily\mdseries\upshape}
\renewcommand{\cftsecpagefont}{\rmfamily\mdseries\upshape} % No bold!

%%% END Article customizations

%%% The "real" document content comes below...

\title{Previous Formulas}
%%% \author{The Author}
\date{}

\begin{document}
\maketitle

\section{Previous version}

\begin{equation}
Card(E1 \bowtie E2) = MIN(Card(E1), Card(E2))
\end{equation}

\begin{equation}
Cost(E1 \bowtie_h E2) = Card(E1) * CRT + Card(E2) * CRT + 2 * CSQ
\end{equation}

\begin{equation}
Cost(E1 \bowtie_b E2) = Card(E1) * CRT + \frac{Card(E1)}{BSZ}* CSQ + Card(E1 \bowtie E2) * CRT
\end{equation}

where the cost for sending a SPARQL query is CSQ, the cost for receiving a single result tuple is CRT and BSZ is the size of a bound join block.

In our experiments CSQ=50, CRT=0.02, BSZ=20

\section{Modified version}
\begin{equation}
Card(E1 \bowtie E2) = MIN(Card(E1), Card(E2)) * MVK(E1) * MVK(E2)
\end{equation}
where MVK(E1) and MVK(E2) are multivalue multiplyers of E1 and E2 respectively.
\[
MVK(E)= \left\{
\begin{array}{@{\thinspace}l@{\thinspace}l}
\frac{1}{\sqrt{2}} &: \text{E is a triple pattern like ?s $<$p$>$ $<$o$>$ . }\\
\frac{\text{triple count for given predicate}}{\text{distinct subject count}} &: \text{E is a triple pattern like ?s $<$p$>$ ?o. The join variable is ?s.} \\
\frac{\text{triple count for given predicate}}{\text{distinct object count}} &: \text{E is a triple pattern like ?s $<$p$>$ ?o. The join variable is ?o.} \\
\text{1} &: \text{other cases}
\end{array}
\right.
\]

\begin{equation}
Cost(E1 \bowtie_h E2) = \frac{1 + TC}{TC} * CSQ + Card(E2) * CRT + (Card(E1) + Card(E2)) * CHT
\end{equation}
where Card(E1) $<$ Card(E2), TC is the number of threads used to query sparql endpoints, CSQ is the cost of sending a SPARQL query, CRT is the cost of receiving a single result tuple, CHT is the cost of handling received tuple.\\\\
Description: the hash join algorithm sends 2 requests for E1 and E2 using TC threads (cost = first summand in the (5)), then recieves results for E1 and E2 in parallel, so cost = Max(Card(E1), Card(E2)) * CRT = Card(E2) * CRT, finally all the tuples received are
handled: the internal implementaion uses hashmap with synchronized access to store data, so cost can be estimated as (Card(E1) + Card(E2)) * CHT

\begin{equation}
Cost(E1 \bowtie_b E2) = CSQ + Card(E1) * CRT + \frac{\frac{Card(E1) + BSZ - 1}{BSZ} + CTC - 1}{CTC} * CSQ
\end{equation}

The bind join algo at first sends the request for E1 (cost = CSQ), receives results for E1 (cost = Card(E1) * CRT), then using TC threads sends resuests for E2 using bunch of BSZ size (cost = third summand in the formula (6)) \\\\
In our experiments CSQ=100, CRT=0.01, 0.0025, BSZ=20, TC=20

\end{document}



12 changes: 9 additions & 3 deletions fedx/src/main/java/com/fluidops/fedx/provider/ProviderUtil.java
Expand Up @@ -37,10 +37,12 @@ public class ProviderUtil {
* @throws QueryEvaluationException
* @throws MalformedQueryException
*/
public static void checkConnectionIfConfigured(Repository repo) {
public static long checkConnectionIfConfigured(Repository repo) {

if (!Config.getConfig().isValidateRepositoryConnections())
return;
return 0;

long startTime = System.currentTimeMillis();

RepositoryConnection conn = repo.getConnection();
try {
Expand All @@ -51,12 +53,16 @@ public static void checkConnectionIfConfigured(Repository repo) {
if (!qRes.hasNext()) {
log.warn("No data in provided repository (" + repo + ")");
}
while (qRes.hasNext()) qRes.next();

} finally {
if (qRes != null)
if (qRes != null) {
Iterations.closeCloseable(qRes);
}
}
} finally {
conn.close();
}
return System.currentTimeMillis() - startTime;
}
}
Expand Up @@ -50,7 +50,10 @@ public Endpoint loadEndpoint(RepositoryInformation repoInfo) throws FedXExceptio
repo.setHttpClient(FedXFactory.httpClient);
repo.initialize();

ProviderUtil.checkConnectionIfConfigured(repo);
long rtime = ProviderUtil.checkConnectionIfConfigured(repo);
if (rtime != 0) {
rtime = ProviderUtil.checkConnectionIfConfigured(repo); // measure again
}

String location = repoInfo.getLocation();
EndpointClassification epc = EndpointClassification.Remote;
Expand All @@ -67,7 +70,7 @@ public Endpoint loadEndpoint(RepositoryInformation repoInfo) throws FedXExceptio
EndpointConfiguration ep = manipulateEndpointConfiguration(location, repoInfo.getEndpointConfiguration());
res.setEndpointConfiguration(ep);
res.setRepo(repo);

res.setResponseTime(rtime);
return res;
} catch (RepositoryException e) {
throw new FedXException("Repository " + repoInfo.getId() + " could not be initialized: " + e.getMessage(), e);
Expand Down
9 changes: 9 additions & 0 deletions fedx/src/main/java/com/fluidops/fedx/structures/Endpoint.java
Expand Up @@ -108,6 +108,7 @@ public static boolean isSupportedFormat(String format) {
protected TripleSource tripleSource; // the triple source, initialized when repository is set
protected EndpointConfiguration endpointConfiguration; // additional endpoint type specific configuration

protected long responseTime = 0;
/**
* Construct a new endpoint.
*
Expand Down Expand Up @@ -153,6 +154,14 @@ public boolean isLocal() {
return endpointClassification==EndpointClassification.Local;
}

public long getResponseTime() {
return responseTime;
}

public void setResponseTime(long val) {
responseTime = val;
}

/**
* Set the underlying initialized repository.
*
Expand Down

0 comments on commit 3b101c3

Please sign in to comment.