Skip to content

Commit

Permalink
Adds full-text searching to Lux sites. (#753)
Browse files Browse the repository at this point in the history
* Adds full-text searching to Lux sites.

* rubocop appeasement.

* Reconfigure css.
  • Loading branch information
bwatson78 committed Aug 14, 2023
1 parent 0c08220 commit cf542c9
Show file tree
Hide file tree
Showing 15 changed files with 158 additions and 11 deletions.
2 changes: 1 addition & 1 deletion app/assets/javascripts/application.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
//= require blacklight/blacklight

//= require bootstrap-select

//= require newspaper_works/ocr_search
//= require_tree .


Expand Down
14 changes: 14 additions & 0 deletions app/assets/javascripts/newspaper_works/ocr_search.js.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<%#
# gem 'newspaper_works', v1.0.2
# Refer to the gem repository for more details: https://github.com/samvera-labs/newspaper_works
# Released under license Apache License 2.0: https://github.com/samvera-labs/newspaper_works/blob/main/LICENSE
# This gem is not yet compatible with Hyrax v3, hence why I am only using the portions relevant to our use case
# This gem is used for keyword highlighting in search results
%>

/* toggle the ocr snippets collapse link text */
$(document).ready(function(){
$('.ocr_snippets_expand').click(function() {
$(this).text($(this).text() == '<%= I18n.t('blacklight.search.results.snippets.more') %>' ? '<%= I18n.t('blacklight.search.results.snippets.less') %>' : '<%= I18n.t('blacklight.search.results.snippets.more') %>');
});
});
18 changes: 18 additions & 0 deletions app/assets/stylesheets/_search_results.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// gem 'newspaper_works', v1.0.2
// Refer to the gem repository for more details: https://github.com/samvera-labs/newspaper_works
// Released under license Apache License 2.0: https://github.com/samvera-labs/newspaper_works/blob/main/LICENSE
// This gem is not yet compatible with Hyrax v3, hence why I am only using the portions relevant to our use case
// This gem is used for keyword highlighting in search results

$highlight-background-color: rgba(5,166,86,0.36);

// #search-results .thumbnail_highlight, #documents .thumbnail_highlight {
// background-color: $highlight-background-color;
// z-index: 1000;
// position: absolute;
// }

.ocr_snippet em {
background-color: $highlight-background-color;
font-weight: bold;
}
3 changes: 2 additions & 1 deletion app/assets/stylesheets/lux.scss
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@
@import 'lux/static_headers';
@import 'lux/hero_image';
@import 'bootstrap';
@import 'bootstrap-select';
@import 'bootstrap-select';
@import 'search_results';
14 changes: 9 additions & 5 deletions app/controllers/catalog_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,18 @@ def guest_uid_authentication_key(key)
## Should the raw solr document endpoint (e.g. /catalog/:id/raw) be enabled
# config.raw_endpoint.enabled = false

list_of_common_fields = <<-EOS.gsub(/^[\s\t]*/, '').gsub(/[\s\t]*\n/, ' ').strip
system_of_record_ID_tesim primary_repository_ID_tesim emory_ark_tesim local_call_number_tesim
other_identifiers_tesim title_tesim uniform_title_tesim series_title_tesim parent_title_tesim
creator_tesim contributors_tesim keywords_tesim subject_topics_tesim subject_names_tesim
subject_geo_tesim subject_time_periods_tesim id all_text_tsimv
EOS
## Default parameters to send to solr for all search-like requests. See also SearchBuilder#processed_parameters
config.default_solr_params = {
qt: 'search',
mm: '100%',
rows: 10,
qf: 'system_of_record_ID_tesim primary_repository_ID_tesim emory_ark_tesim local_call_number_tesim
other_identifiers_tesim title_tesim uniform_title_tesim series_title_tesim parent_title_tesim
creator_tesim contributors_tesim keywords_tesim subject_topics_tesim subject_names_tesim
subject_geo_tesim subject_time_periods_tesim id',
qf: list_of_common_fields,
fq: '(((has_model_ssim:CurateGenericWork) OR (has_model_ssim:Collection)) AND !(visibility_ssi:restricted))'
## we want to only return works where visiblity_ssi != restricted
}
Expand Down Expand Up @@ -166,6 +169,7 @@ def guest_uid_authentication_key(key)
config.add_index_field 'human_readable_date_created_tesim', label: 'Date', if: :display_date?
config.add_index_field 'human_readable_content_type_ssim', label: 'Format'
config.add_index_field 'visibility_group_ssi', label: 'Access', if: :display_access?
config.add_index_field 'all_text_tsimv', highlight: true, helper_method: :render_ocr_snippets

def display_library?(_field_config, document)
document["has_model_ssim"] == ["Collection"]
Expand Down Expand Up @@ -310,7 +314,7 @@ def display_access?(_field_config, document)
'grant_agencies_tesim', 'content_genres_tesim', 'grant_information_tesim', 'author_notes_tesim',
'notes_tesim', 'data_source_notes_tesim', 'geographic_unit_tesim', 'technical_note_tesim',
'issn_tesim', 'isbn_tesim', 'abstract_tesim', 'related_publications_tesim', 'related_datasets_tesim',
'table_of_contents_tesim']
'table_of_contents_tesim', 'all_text_tsimv']

config.add_search_field('all_fields_advanced', label: 'All Fields') do |field|
field.include_in_simple_select = false
Expand Down
2 changes: 2 additions & 0 deletions app/helpers/additional_catalog_helper.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true
module AdditionalCatalogHelper
include NewspaperWorks::NewspaperWorksHelperBehavior

def purl(doc_id)
"https://digital.library.emory.edu/purl/#{doc_id}"
end
Expand Down
31 changes: 31 additions & 0 deletions app/helpers/newspaper_works/newspaper_works_helper_behavior.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# frozen_string_literal: true
# gem 'newspaper_works', v1.0.2
# Refer to the gem repository for more details: https://github.com/samvera-labs/newspaper_works
# Released under license Apache License 2.0: https://github.com/samvera-labs/newspaper_works/blob/main/LICENSE
# This gem is not yet compatible with Hyrax v3, hence why I am only using the portions relevant to our use case
# This gem is used for keyword highlighting in search results

module NewspaperWorks
module NewspaperWorksHelperBehavior
##
# print the ocr snippets. if more than one, separate with <br/>
#
# @param options [Hash] options hash provided by Blacklight
# @return [String] snippets HTML to be rendered
# rubocop:disable Rails/OutputSafety
def render_ocr_snippets(options = {})
snippets = options[:value]
snippets_content = [tag.div("... #{snippets.first} ...".html_safe,
class: 'ocr_snippet first_snippet')]
if snippets.length > 1
snippets_content << render(partial: 'catalog/snippets_more',
locals: {
snippets: snippets.drop(1),
options: options
})
end
snippets_content.join("\n").html_safe
end
# rubocop:enable Rails/OutputSafety
end
end
9 changes: 7 additions & 2 deletions app/models/search_builder.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# frozen_string_literal: true
require './lib/newspaper_works/highlight_search_params.rb'

class SearchBuilder < Blacklight::SearchBuilder
include Blacklight::Solr::SearchBuilderBehavior
include BlacklightRangeLimit::RangeLimitBuilder

include BlacklightAdvancedSearch::AdvancedSearchBuilder
self.default_processor_chain += [:add_advanced_parse_q_to_solr, :add_advanced_search_to_solr]
include NewspaperWorks::HighlightSearchParams

self.default_processor_chain += [
:add_advanced_parse_q_to_solr, :add_advanced_search_to_solr, :highlight_search_params
]

##
# @example Adding a new step to the processor chain
Expand Down
23 changes: 23 additions & 0 deletions app/views/catalog/_snippets_more.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<%#
# gem 'newspaper_works', v1.0.2
# Refer to the gem repository for more details: https://github.com/samvera-labs/newspaper_works
# Released under license Apache License 2.0: https://github.com/samvera-labs/newspaper_works/blob/main/LICENSE
# This gem is not yet compatible with Hyrax v3, hence why I am only using the portions relevant to our use case
# This gem is used for keyword highlighting in search results
%>
<%# additional ocr snippets, with a Bootstrap collapse toggle control %>
<% document_id = options[:document].id %>
<div class="collapse ocr_snippet" id="<%= "snippet_collapse_#{document_id}" %>">
<% snippets.each do |snippet| %>
<%= content_tag('div',
"... #{snippet} ...".html_safe,
class: 'ocr_snippet') %>
<% end %>
</div>
<%= link_to(t('blacklight.search.results.snippets.more'),
"#snippet_collapse_#{document_id}",
data: {toggle: 'collapse'},
'aria-expanded' => 'false',
'aria-controls' => "#snippet_collapse_#{document_id}",
class: 'ocr_snippets_expand js-controls')
%>
6 changes: 6 additions & 0 deletions config/locales/blacklight.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,15 @@ en:
search:
facets:
title: 'Browse All Items'
fields:
all_text_tsimv: 'Full-text matches'
form:
search:
placeholder: 'Search Digital Collections'
results:
snippets:
less: '<< less'
more: 'more >>'
search_history:
recent: 'Your recent searches (cleared after your browser session)'
work:
Expand Down
21 changes: 21 additions & 0 deletions lib/newspaper_works/highlight_search_params.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# frozen_string_literal: true
# gem 'newspaper_works', v1.0.2
# Refer to the gem repository for more details: https://github.com/samvera-labs/newspaper_works
# Released under license Apache License 2.0: https://github.com/samvera-labs/newspaper_works/blob/main/LICENSE
# This gem is not yet compatible with Hyrax v3, hence why I am only using the portions relevant to our use case
# This gem is used for keyword highlighting in search results

module NewspaperWorks
# add highlighting on _stored_ full text field if this is a keyword search
# can be added to default_processor_chain in a SearchBuilder class
module HighlightSearchParams
# add highlights on full text field, if there is a keyword query
def highlight_search_params(solr_parameters = {})
return unless solr_parameters[:q] || solr_parameters[:all_fields]
solr_parameters[:hl] = true
solr_parameters[:'hl.fl'] = 'all_text_tsimv'
solr_parameters[:'hl.fragsize'] = 100
solr_parameters[:'hl.snippets'] = 5
end
end
end
3 changes: 2 additions & 1 deletion spec/controllers/catalog_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
'holding_repository_tesim',
'human_readable_content_type',
'human_readable_date_created_tesim',
'visibility_group_ssi']
'visibility_group_ssi',
'all_text_tsimv']
end
it { expect(index_fields).to contain_exactly(*expected_index_fields) }
end
Expand Down
4 changes: 4 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# frozen_string_literal: true
require 'simplecov'
require 'webdrivers/chromedriver'

Webdrivers::Chromedriver.required_version = "114.0.5735.90" unless ENV['CI']

SimpleCov.start 'rails' do
add_filter '/spec/' # for rspec
add_filter '/app/jobs/'
Expand Down
3 changes: 2 additions & 1 deletion spec/support/solr_documents/curate_generic_work.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,6 @@
thumbnail_path_ss: ['/downloads/825x69p8dh-cor?file=thumbnail'],
hasRelatedImage_ssim: ['825x69p8dh-cor'],
human_readable_visibility_ssi: 'Public',
visibility_group_ssi: 'Public'
visibility_group_ssi: 'Public',
all_text_tsimv: ["This is the story of Teddy Longfellow, who lived to a hundred and three!"]
}.freeze
16 changes: 16 additions & 0 deletions spec/system/view_search_results_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
solr.commit
ENV['THUMBNAIL_URL'] = 'http://obviously_fake_url.com'
end
after { delete_all_documents_from_solr }

let(:collection_id) { COLLECTION[:id] }
let(:parent_work_id) { PARENT_CURATE_GENERIC_WORK[:id] }
Expand Down Expand Up @@ -86,4 +87,19 @@
expect(page).to have_css('dl.document-heading-second-row')
end
end

context 'when searching for a work indexed for full-text searching' do
it 'returns only the simple work with the expected elements' do
visit "/"
fill_in 'q', with: 'teddy longfellow'
click_on('search')

expect(find_all('#documents article header h3 a').size).to eq(1)
expect(page).to have_content('The Title of my Work')
expect(page).to have_content('Full-text matches:')
expect(page).to have_content(
'... This is the story of Teddy Longfellow, who lived to a hundred and three! ...'
)
end
end
end

0 comments on commit cf542c9

Please sign in to comment.