/
helpers.jl
109 lines (86 loc) · 3.04 KB
/
helpers.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
augment(term) -> Vector{String}
Given a term, returns a list of terms which should be treated as synonyms.
Currently only supports agumenting (spaces or hyphens) with (spaces, no spaces).
## Example
```jldoctest
julia> KeywordSearch.augment("arctic wolf")
2-element Vector{String}:
"arctic wolf"
"arcticwolf"
```
"""
function augment(term)
terms = String[]
words = split(term, char -> isspace(char) || char == '-')
n_words = length(words)
joiners = (" ", "")
# This could be optimized
for joins in Iterators.product(Iterators.repeated(joiners, n_words - 1)...)
word = ""
for (w, j) in zip(words, joins)
word = string(word, w, j)
end
word = string(word, words[end])
push!(terms, word)
end
return terms
end
reconstruct(Q::FuzzyQuery) = x -> FuzzyQuery(x, Q.dist, Q.threshold)
reconstruct(Q::Query) = Query
function augment(Q::AbstractQuery)
make_q = reconstruct(Q)
return Or(Tuple(make_q.(augment(Q.text))))
end
# `raw` needed due to the `\n` in the test
@doc raw"""
word_boundary(Q::AbstractQuery) -> AbstractQuery
Ensures that a word or phrase is not hyphenated or conjoined with the surrounding text.
## Example
```jldoctest
julia> using Test
julia> query = Query("word")
Query("word")
julia> @test match(query, Document("This matchesword ")) !== nothing
Test Passed
julia> @test match(word_boundary(query), Document("This matches word.")) !== nothing
Test Passed
julia> @test match(word_boundary(query), Document("This matches word ")) !== nothing
Test Passed
julia> @test match(word_boundary(query), Document("This matches word\nNext line")) !== nothing
Test Passed
julia> @test match(word_boundary(query), Document("This doesn't matchword ")) === nothing
Test Passed
```
"""
function word_boundary(Q::AbstractQuery)
# `process_document` has removed punctuation, so we just need to check for spaces.
make_q = reconstruct(Q)
stripped_text = strip(Q.text)
return make_q(string(" ", stripped_text, " "))
end
Base.length(Q::AbstractQuery) = mapreduce(x -> 1, +, AbstractTrees.Leaves(Q); init=0)
function check_keys(::Type{T}) where {T<:NamedTuple}
if :match ∈ _nt_names(T)
throw(ArgumentError("Must not include `match` as a metadata key."))
end
return nothing
end
function disjoint_keys_check(::NamedQuery{T1}, ::Corpus{T2,TR}) where {T1,T2,TR}
disjoint_keys_check(T1, T2)
disjoint_keys_check(T1, TR)
return nothing
end
function disjoint_keys_check(::NamedQuery{T1}, ::Document{T2}) where {T1,T2}
disjoint_keys_check(T1, T2)
return nothing
end
# a similar version of this exists as an internal function in Base:
# https://github.com/JuliaLang/julia/blob/e68dda9785b4523cae49f8a60f99aa9360226eb4/base/namedtuple.jl#L181
_nt_names(::Type{NamedTuple{names,T}}) where {names,T} = names
function disjoint_keys_check(::Type{T1}, ::Type{T2}) where {T1,T2}
if !isdisjoint(_nt_names(T1), _nt_names(T2))
error("Metadata keys will clash when merging metadata to construct the `NamedMatch`.")
end
return nothing
end