Skip to content

Commit

Permalink
Adds optional spatial search overlapRatio relevancy support. Configur…
Browse files Browse the repository at this point in the history
…able via the presence of Settings.OVERLAP_RATIO_BOOST.

When a bbox param is present, the Settings.OVERLAP_RATIO_BOOST value will boost results based on the overlap ratio of the result bboxes.

Includes a modification to our Solr schema to include a _bboxtype fieldtype. Requires Solr 7.4+

Co-Authored-By: hudajkhan <hudajkhan@users.noreply.github.com>
  • Loading branch information
2 people authored and ewlarson committed Jan 31, 2019
1 parent 0354377 commit 1d9ff42
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 2 deletions.
1 change: 1 addition & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Metrics/LineLength:
- 'Rakefile'
- 'lib/generators/geoblacklight/install_generator.rb'
- 'lib/tasks/geoblacklight.rake'
- 'app/models/concerns/geoblacklight/spatial_search_behavior.rb'

Metrics/MethodLength:
Max: 16
Expand Down
6 changes: 6 additions & 0 deletions app/models/concerns/geoblacklight/spatial_search_behavior.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ def add_spatial_params(solr_params)
solr_params[:bq] << "#{Settings.FIELDS.GEOMETRY}:\"IsWithin(#{envelope_bounds})\"#{boost}"
solr_params[:fq] ||= []
solr_params[:fq] << "#{Settings.FIELDS.GEOMETRY}:\"Intersects(#{envelope_bounds})\""

if Settings.OVERLAP_RATIO_BOOST
solr_params[:overlap] =
"{!field uf=* defType=lucene f=solr_bboxtype score=overlapRatio}Intersects(#{envelope_bounds})"
solr_params[:bf] = "$overlap^#{Settings.OVERLAP_RATIO_BOOST}"
end
end
solr_params
rescue Geoblacklight::Exceptions::WrongBoundingBoxFormat
Expand Down
3 changes: 3 additions & 0 deletions lib/generators/geoblacklight/templates/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ GEOMETRY_FIELD: 'solr_geom'
# The bq boost value for spatial search matches within a bounding box
BBOX_WITHIN_BOOST: '10'

# The bf boost value for overlap ratio
OVERLAP_RATIO_BOOST: '2'

# Solr field mappings
FIELDS:
:FILE_FORMAT: 'dc_format_s'
Expand Down
5 changes: 5 additions & 0 deletions solr/conf/core.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#Written by CorePropertiesLocator
#name=geoblacklight #Change the name to the appropriate one you would like to use
config=solrconfig.xml
schema=schema.xml
#dataDir=geodata #Change this directory to the appropriate one you would like to use
12 changes: 11 additions & 1 deletion solr/conf/schema.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<schema name="geoblacklight-schema" version="1.6">
<schema name="geoblacklight-schema" version="1.7">
<uniqueKey>layer_slug_s</uniqueKey>
<fields>
<field name="_version_" type="long" stored="true" indexed="true"/>
Expand Down Expand Up @@ -64,6 +64,8 @@
<dynamicField name="*_pt" type="location" stored="true" indexed="true"/>
<dynamicField name="*_bbox" type="location_rpt" stored="true" indexed="true"/><!-- deprecated -->
<dynamicField name="*_geom" type="location_rpt" stored="true" indexed="true"/>
<dynamicField name="*_bboxtype" type="bbox" stored="true" indexed="true"/>

</fields>

<types>
Expand Down Expand Up @@ -144,6 +146,11 @@

<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers"/>
<!-- Adding field type for bboxField that enables, among other things, overlap ratio calculations -->
<fieldType name="bbox" class="solr.BBoxField"
geo="true" distanceUnits="kilometers" numberType="pdouble" />
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>


</types>

Expand Down Expand Up @@ -187,4 +194,7 @@
<copyField source="dct_provenance_s" dest="suggest"/>
<copyField source="dc_subject_sm" dest="suggest"/>
<copyField source="dct_spatial_sm" dest="suggest"/>

<!-- for bbox value -->
<copyField source="solr_geom" dest="solr_bboxtype"/>
</schema>
2 changes: 1 addition & 1 deletion solr/conf/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
that you fully re-index after changing this setting as it can
affect both how text is indexed and queried.
-->
<luceneMatchVersion>6.1</luceneMatchVersion>
<luceneMatchVersion>7.6</luceneMatchVersion>

<lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lib" />
<lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lucene-libs" />
Expand Down
55 changes: 55 additions & 0 deletions spec/features/search_results_overlap_ratio_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
require 'spec_helper'

feature 'spatial search results overlap ratio' do
scenario 'result bboxes fully contained, overlap ratio applied to relevancy' do
allow(Settings).to receive(:OVERLAP_RATIO_BOOST).and_return 200

# BBox param is the US upper midwest and Canada, roughly centered on the state of Minnesota
visit search_catalog_path(
bbox: '-103.196521 39.21962 -84.431873 53.63497',
'f[dct_provenance_s][]': 'Minnesota'
)

# MN State result
# Slightly bigger bbox / result bbox fits bbox param best
expect(position_in_result_page(page, 'e9c71086-6b25-4950-8e1c-84c2794e3382')).to eq 1

# MN State result
# Slightly smaller bbox / result bbox fits bbox param second best
expect(position_in_result_page(page, '2eddde2f-c222-41ca-bd07-2fd74a21f4de')).to eq 2

# TC Metro result
# Smaller bbox / result bbox fits bbox param third
expect(position_in_result_page(page, '02236876-9c21-42f6-9870-d2562da8e44f')).to eq 3
end

scenario 'three bboxes overlap, but none are fully contained, overlap ratio should still impact relevancy' do
allow(Settings).to receive(:OVERLAP_RATIO_BOOST).and_return 200

# BBox param is the center to western edge of New York state and Canada, roughly centered on Lake Ontario
visit search_catalog_path(
bbox: '-83.750499 40.41709 -74.368175 47.963663',
'f[dct_provenance_s][]': 'Cornell'
)

# NY State result
# Bigger bbox / result bbox overlaps bbox param best
expect(position_in_result_page(page, 'cugir-008186')).to be < 3

# NY State result
# Bigger bbox / result bbox overlaps bbox param best (score tie)
expect(position_in_result_page(page, 'cugir-008186-no-downloadurl')).to be < 3

# NY Adirondak Region result
# Smaller bbox / result bbox overlaps bbox param the least
expect(position_in_result_page(page, 'cugir-007741')).to eq 3
end
end

def position_in_result_page(page, id)
results = []
page.all('div.documentHeader.row').each do |div|
results << div['data-layer-id']
end
results.index(id) + 1
end
10 changes: 10 additions & 0 deletions spec/models/concerns/geoblacklight/spatial_search_behavior_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@
expect(subject.add_spatial_params(solr_params)[:bq].to_s).to include('^10')
end

it 'applies overlapRatio when Settings.OVERLAP_RATIO_BOOST is configured' do
allow(Settings).to receive(:OVERLAP_RATIO_BOOST).and_return 2
expect(subject.add_spatial_params(solr_params)[:bf].to_s).to include('$overlap^2')
end

it 'does not apply overlapRatio when Settings.OVERLAP_RATIO_BOOST not configured' do
allow(Settings).to receive(:OVERLAP_RATIO_BOOST).and_return nil
expect(subject.add_spatial_params(solr_params)).not_to have_key(:overlap)
end

context 'when the wrong format for the bounding box is used' do
before do
allow(subject).to receive(:bounding_box).and_raise(Geoblacklight::Exceptions::WrongBoundingBoxFormat)
Expand Down

0 comments on commit 1d9ff42

Please sign in to comment.