In [1]:
const namespaces = require('./namespaces')
const sparqljs = require('sparqljs')
const parser = new sparqljs.Parser(namespaces)

const eachFileInDir = require('./eachFileInDir')
const countInQueries = require('./countInQueries')

const EN_SPARQL = 'data/sparql_queries_enwiki'
const DE_SPARQL = 'data/sparql_queries_dewiki'

## Valid Queries

In [2]:
var isValidQuery = (query) => {
    return true // will internally throw an exception when parsed
}

countInQueries(EN_SPARQL, isValidQuery)
countInQueries(DE_SPARQL, isValidQuery)

data/sparql_queries_dewiki 669 669
data/sparql_queries_enwiki 992 992


## OPTIONAL

In [3]:
var hasOptional = (query) => {
    return query.where.some((clause) => {
        return clause.type === 'optional'
    })
}
countInQueries(EN_SPARQL, hasOptional)
countInQueries(DE_SPARQL, hasOptional)

data/sparql_queries_dewiki 669 40
data/sparql_queries_enwiki 992 144


## FILTER

In [4]:
var hasFilter = (query) => {
    return query.where.some((clause) => {
        return clause.type === 'filter'
    })
}
countInQueries(EN_SPARQL, hasFilter)
countInQueries(DE_SPARQL, hasFilter)

data/sparql_queries_dewiki 669 51
data/sparql_queries_enwiki 992 373


## ORDER BY

In [5]:
var hasOrderBy = (query) => {
    return query.order
}
countInQueries(EN_SPARQL, hasOrderBy)
countInQueries(DE_SPARQL, hasOrderBy)

data/sparql_queries_dewiki 669 27
data/sparql_queries_enwiki 992 98


## DISTINCT

In [6]:
var hasDistinct = (query) => {
    return query.distinct
}
countInQueries(EN_SPARQL, hasDistinct)
countInQueries(DE_SPARQL, hasDistinct)

data/sparql_queries_dewiki 669 33
data/sparql_queries_enwiki 992 46


## GROUP BY

In [7]:
var hasGroupBy = (query) => {
    return query.group
}
countInQueries(EN_SPARQL, hasGroupBy)
countInQueries(DE_SPARQL, hasGroupBy)

data/sparql_queries_dewiki 669 14
data/sparql_queries_enwiki 992 76


## VALUES

In [8]:
var hasValues = (query) => {
    return query.where.some((clause) => {
        return clause.type === 'values'
    })
}
countInQueries(EN_SPARQL, hasValues)
countInQueries(DE_SPARQL, hasValues)

data/sparql_queries_dewiki 669 10
data/sparql_queries_enwiki 992 56


## UNION

In [9]:
var hasUnion = (query) => {
    return query.where.some((clause) => {
        return clause.type === 'union'
    })
}
countInQueries(EN_SPARQL, hasUnion)
countInQueries(DE_SPARQL, hasUnion)

data/sparql_queries_dewiki 669 148
data/sparql_queries_enwiki 992 86


## MINUS

In [10]:
var hasMinus = (query) => {
    return query.where.some((clause) => {
        return clause.type === 'minus'
    })
}
countInQueries(EN_SPARQL, hasMinus)
countInQueries(DE_SPARQL, hasMinus)

data/sparql_queries_dewiki 669 15
data/sparql_queries_enwiki 992 32


## Subqueries

In [11]:
var hasSubQuery = (query) => {
    return query.where.some((clause) => {
        return clause.type === 'query'
    })
}
countInQueries(EN_SPARQL, hasSubQuery)
countInQueries(DE_SPARQL, hasSubQuery)

data/sparql_queries_dewiki 669 23
data/sparql_queries_enwiki 992 16


## Multiple Subject Queries

In [12]:
var hasMultipleSubjects = (query) => {
    const triples = query.where.reduce(
        (triples, clause) => clause.type === 'bgp' ? triples.concat(clause.triples) : triples,
        []
    )
    const subjects = triples.map(triple => triple.subject)
    const objects = triples.map(triple => triple.object)
    
    return subjects.some((subject) => {
        return typeof subject === 'string' && subject.startsWith('?') && objects.includes(subject)
    })
}
countInQueries(EN_SPARQL, hasMultipleSubjects)
countInQueries(DE_SPARQL, hasMultipleSubjects)

data/sparql_queries_dewiki 669 56
data/sparql_queries_enwiki 992 297


## Property Path

In [13]:
var hasPropertyPath = (query) => {
    const triples = query.where.reduce(
        (triples, clause) => clause.type === 'bgp' ? triples.concat(clause.triples) : triples,
        []
    )
    return triples.some(triple => triple.predicate.type === 'path')
}
countInQueries(EN_SPARQL, hasPropertyPath)
countInQueries(DE_SPARQL, hasPropertyPath)

data/sparql_queries_dewiki 669 419
data/sparql_queries_enwiki 992 93


## wdt:P31/wdt:P279*

In [14]:
var hasInstanceOfSubclassOf = (query) => {
    const triples = query.where.reduce(
        (triples, clause) => clause.type === 'bgp' ? triples.concat(clause.triples) : triples,
        []
    )
    return triples.some(triple => {
        return ( triple.predicate.type === 'path'
            && triple.predicate.pathType === '/'
            && triple.predicate.items[0] === 'http://www.wikidata.org/prop/direct/P31'
            && triple.predicate.items[1].pathType === '*'
            && triple.predicate.items[1].items[0] === 'http://www.wikidata.org/prop/direct/P279' )
    })
}
countInQueries(EN_SPARQL, hasInstanceOfSubclassOf)
countInQueries(DE_SPARQL, hasInstanceOfSubclassOf)

data/sparql_queries_dewiki 669 3
data/sparql_queries_enwiki 992 8


## wdt:P279*

In [15]:
var hasSubclassOf = function hasSubclassOf(predicate) {
    if (predicate.type !== 'path') return false
    if (predicate.pathType === '*' && predicate.items.includes('http://www.wikidata.org/prop/direct/P279')) {
        return true
    }
    
    return predicate.items.reduce((any, predicate) => { return any || hasSubclassOf(predicate) }, false)
}
var hasInstanceOfSubclassOf = (query) => {
    const triples = query.where.reduce(
        (triples, clause) => clause.type === 'bgp' ? triples.concat(clause.triples) : triples,
        []
    )
    return triples.some(triple => hasSubclassOf(triple.predicate))
}
countInQueries(EN_SPARQL, hasInstanceOfSubclassOf)
countInQueries(DE_SPARQL, hasInstanceOfSubclassOf)

data/sparql_queries_dewiki 669 43
data/sparql_queries_enwiki 992 53


## Use of Qualifiers

In [16]:
var hasQualifiers = (parsedQuery, rawQuery) => {
    return rawQuery.includes('pq:P')
}

countInQueries(EN_SPARQL, hasQualifiers)
countInQueries(DE_SPARQL, hasQualifiers)

data/sparql_queries_dewiki 669 22
data/sparql_queries_enwiki 992 37


## Use of References

In [17]:
var hasReferences = (parsedQuery, rawQuery) => {
    return rawQuery.includes('pr:P')
}

countInQueries(EN_SPARQL, hasReferences)
countInQueries(DE_SPARQL, hasReferences)

data/sparql_queries_dewiki 669 0
data/sparql_queries_enwiki 992 4


In [18]:
var predicates = {}

eachFileInDir(DE_SPARQL, (query, resolve) => {
    var wikibasePredicates = query.match(/wikibase:\w+/g)
    if (wikibasePredicates) wikibasePredicates.forEach((predicate) => {
        if (predicates[predicate]) predicates[predicate]++
        else predicates[predicate] = 1
    })
    resolve()
}).then(() => {
    var tuples = []
    for (var predicate in predicates) tuples.push([predicate, predicates[predicate]])
    console.log(tuples.sort((a, b) => b[1] - a[1]).slice(0, 5))
})

Promise { <pending> }

[ [ 'wikibase:label', 13 ],
  [ 'wikibase:language', 13 ],
  [ 'wikibase:rank', 9 ],
  [ 'wikibase:DeprecatedRank', 9 ],
  [ 'wikibase:around', 6 ] ]


## Features per Query

In [19]:
var allFeatures = [
    hasOptional, hasFilter, hasOrderBy,
    hasValues, hasUnion, hasMinus,
    hasSubQuery, hasMultipleSubjects, hasPropertyPath,
    hasSubclassOf, hasQualifiers, hasReferences,
    hasGroupBy
]
var featuresPerQuery = (new Array(12)).fill(0)

eachFileInDir(DE_SPARQL, (query, resolve) => {
    let features = 0
    allFeatures.forEach((hasFeature) => {
        if (hasFeature(parser.parse(query), query)) features++
    })
    featuresPerQuery[features]++
    resolve()
}).then(() => {
    console.log(featuresPerQuery)
})

// EN: [ 432, 111, 248, 117, 71, 9, 3, 1, 0, 0, 0, 0 ]
// DE: [ 85,  451,  74,  40,  7, 3, 0, 9, 0, 0, 0, 0 ]

Promise { <pending> }

[ 85, 451, 74, 40, 7, 3, 0, 9, 0, 0, 0, 0 ]


## Finding a subset of features that is sufficient for most queries

In [22]:
var allFeatures = [
    hasOptional, hasFilter, hasOrderBy,
    hasValues, hasUnion, hasMinus,
    hasSubQuery, hasMultipleSubjects, hasPropertyPath,
    hasSubclassOf, hasQualifiers, hasReferences,
    hasGroupBy
]
var subset = [
    hasOptional, hasFilter, hasMultipleSubjects,
    hasQualifiers, hasValues, hasOrderBy,
    hasUnion
];
var queriesProduceable = 0

var isProduceableWithSubset = (parsedQuery, rawQuery) => {
    let featuresUsed = 0
    let subsetFeaturesUsed = 0

    allFeatures.forEach((hasFeature) => {
        if (hasFeature(parsedQuery, rawQuery)) featuresUsed++
    })
    subset.forEach((hasFeature) => {
        if (hasFeature(parsedQuery, rawQuery)) subsetFeaturesUsed++
    })
    
    return featuresUsed === subsetFeaturesUsed
}

countInQueries(EN_SPARQL, isProduceableWithSubset)
countInQueries(DE_SPARQL, isProduceableWithSubset)

data/sparql_queries_dewiki 669 239
data/sparql_queries_enwiki 992 807
