From cf542c9ba7af924b73f288b708a4a7115504ec9c Mon Sep 17 00:00:00 2001 From: Brad Watson Date: Mon, 14 Aug 2023 09:36:47 -0500 Subject: [PATCH] Adds full-text searching to Lux sites. (#753) * Adds full-text searching to Lux sites. * rubocop appeasement. * Reconfigure css. --- app/assets/javascripts/application.js | 2 +- .../newspaper_works/ocr_search.js.erb | 14 +++++++++ app/assets/stylesheets/_search_results.scss | 18 +++++++++++ app/assets/stylesheets/lux.scss | 3 +- app/controllers/catalog_controller.rb | 14 ++++++--- app/helpers/additional_catalog_helper.rb | 2 ++ .../newspaper_works_helper_behavior.rb | 31 +++++++++++++++++++ app/models/search_builder.rb | 9 ++++-- app/views/catalog/_snippets_more.html.erb | 23 ++++++++++++++ config/locales/blacklight.en.yml | 6 ++++ .../highlight_search_params.rb | 21 +++++++++++++ spec/controllers/catalog_controller_spec.rb | 3 +- spec/spec_helper.rb | 4 +++ .../solr_documents/curate_generic_work.rb | 3 +- spec/system/view_search_results_spec.rb | 16 ++++++++++ 15 files changed, 158 insertions(+), 11 deletions(-) create mode 100644 app/assets/javascripts/newspaper_works/ocr_search.js.erb create mode 100644 app/assets/stylesheets/_search_results.scss create mode 100644 app/helpers/newspaper_works/newspaper_works_helper_behavior.rb create mode 100644 app/views/catalog/_snippets_more.html.erb create mode 100644 lib/newspaper_works/highlight_search_params.rb diff --git a/app/assets/javascripts/application.js b/app/assets/javascripts/application.js index 731b5d9b..8c9bb777 100644 --- a/app/assets/javascripts/application.js +++ b/app/assets/javascripts/application.js @@ -21,7 +21,7 @@ //= require blacklight/blacklight //= require bootstrap-select - +//= require newspaper_works/ocr_search //= require_tree . diff --git a/app/assets/javascripts/newspaper_works/ocr_search.js.erb b/app/assets/javascripts/newspaper_works/ocr_search.js.erb new file mode 100644 index 00000000..90002410 --- /dev/null +++ b/app/assets/javascripts/newspaper_works/ocr_search.js.erb @@ -0,0 +1,14 @@ +<%# +# gem 'newspaper_works', v1.0.2 +# Refer to the gem repository for more details: https://github.com/samvera-labs/newspaper_works +# Released under license Apache License 2.0: https://github.com/samvera-labs/newspaper_works/blob/main/LICENSE +# This gem is not yet compatible with Hyrax v3, hence why I am only using the portions relevant to our use case +# This gem is used for keyword highlighting in search results +%> + +/* toggle the ocr snippets collapse link text */ +$(document).ready(function(){ + $('.ocr_snippets_expand').click(function() { + $(this).text($(this).text() == '<%= I18n.t('blacklight.search.results.snippets.more') %>' ? '<%= I18n.t('blacklight.search.results.snippets.less') %>' : '<%= I18n.t('blacklight.search.results.snippets.more') %>'); + }); +}); \ No newline at end of file diff --git a/app/assets/stylesheets/_search_results.scss b/app/assets/stylesheets/_search_results.scss new file mode 100644 index 00000000..97974066 --- /dev/null +++ b/app/assets/stylesheets/_search_results.scss @@ -0,0 +1,18 @@ +// gem 'newspaper_works', v1.0.2 +// Refer to the gem repository for more details: https://github.com/samvera-labs/newspaper_works +// Released under license Apache License 2.0: https://github.com/samvera-labs/newspaper_works/blob/main/LICENSE +// This gem is not yet compatible with Hyrax v3, hence why I am only using the portions relevant to our use case +// This gem is used for keyword highlighting in search results + +$highlight-background-color: rgba(5,166,86,0.36); + +// #search-results .thumbnail_highlight, #documents .thumbnail_highlight { +// background-color: $highlight-background-color; +// z-index: 1000; +// position: absolute; +// } + +.ocr_snippet em { + background-color: $highlight-background-color; + font-weight: bold; +} \ No newline at end of file diff --git a/app/assets/stylesheets/lux.scss b/app/assets/stylesheets/lux.scss index aafd3b9d..6e9b1779 100644 --- a/app/assets/stylesheets/lux.scss +++ b/app/assets/stylesheets/lux.scss @@ -34,4 +34,5 @@ @import 'lux/static_headers'; @import 'lux/hero_image'; @import 'bootstrap'; -@import 'bootstrap-select'; \ No newline at end of file +@import 'bootstrap-select'; +@import 'search_results'; \ No newline at end of file diff --git a/app/controllers/catalog_controller.rb b/app/controllers/catalog_controller.rb index 3909ef5e..4ae8ed2d 100644 --- a/app/controllers/catalog_controller.rb +++ b/app/controllers/catalog_controller.rb @@ -50,15 +50,18 @@ def guest_uid_authentication_key(key) ## Should the raw solr document endpoint (e.g. /catalog/:id/raw) be enabled # config.raw_endpoint.enabled = false + list_of_common_fields = <<-EOS.gsub(/^[\s\t]*/, '').gsub(/[\s\t]*\n/, ' ').strip + system_of_record_ID_tesim primary_repository_ID_tesim emory_ark_tesim local_call_number_tesim + other_identifiers_tesim title_tesim uniform_title_tesim series_title_tesim parent_title_tesim + creator_tesim contributors_tesim keywords_tesim subject_topics_tesim subject_names_tesim + subject_geo_tesim subject_time_periods_tesim id all_text_tsimv + EOS ## Default parameters to send to solr for all search-like requests. See also SearchBuilder#processed_parameters config.default_solr_params = { qt: 'search', mm: '100%', rows: 10, - qf: 'system_of_record_ID_tesim primary_repository_ID_tesim emory_ark_tesim local_call_number_tesim - other_identifiers_tesim title_tesim uniform_title_tesim series_title_tesim parent_title_tesim - creator_tesim contributors_tesim keywords_tesim subject_topics_tesim subject_names_tesim - subject_geo_tesim subject_time_periods_tesim id', + qf: list_of_common_fields, fq: '(((has_model_ssim:CurateGenericWork) OR (has_model_ssim:Collection)) AND !(visibility_ssi:restricted))' ## we want to only return works where visiblity_ssi != restricted } @@ -166,6 +169,7 @@ def guest_uid_authentication_key(key) config.add_index_field 'human_readable_date_created_tesim', label: 'Date', if: :display_date? config.add_index_field 'human_readable_content_type_ssim', label: 'Format' config.add_index_field 'visibility_group_ssi', label: 'Access', if: :display_access? + config.add_index_field 'all_text_tsimv', highlight: true, helper_method: :render_ocr_snippets def display_library?(_field_config, document) document["has_model_ssim"] == ["Collection"] @@ -310,7 +314,7 @@ def display_access?(_field_config, document) 'grant_agencies_tesim', 'content_genres_tesim', 'grant_information_tesim', 'author_notes_tesim', 'notes_tesim', 'data_source_notes_tesim', 'geographic_unit_tesim', 'technical_note_tesim', 'issn_tesim', 'isbn_tesim', 'abstract_tesim', 'related_publications_tesim', 'related_datasets_tesim', - 'table_of_contents_tesim'] + 'table_of_contents_tesim', 'all_text_tsimv'] config.add_search_field('all_fields_advanced', label: 'All Fields') do |field| field.include_in_simple_select = false diff --git a/app/helpers/additional_catalog_helper.rb b/app/helpers/additional_catalog_helper.rb index 8aa4a9f8..784bed58 100644 --- a/app/helpers/additional_catalog_helper.rb +++ b/app/helpers/additional_catalog_helper.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true module AdditionalCatalogHelper + include NewspaperWorks::NewspaperWorksHelperBehavior + def purl(doc_id) "https://digital.library.emory.edu/purl/#{doc_id}" end diff --git a/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb b/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb new file mode 100644 index 00000000..a75da44d --- /dev/null +++ b/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true +# gem 'newspaper_works', v1.0.2 +# Refer to the gem repository for more details: https://github.com/samvera-labs/newspaper_works +# Released under license Apache License 2.0: https://github.com/samvera-labs/newspaper_works/blob/main/LICENSE +# This gem is not yet compatible with Hyrax v3, hence why I am only using the portions relevant to our use case +# This gem is used for keyword highlighting in search results + +module NewspaperWorks + module NewspaperWorksHelperBehavior + ## + # print the ocr snippets. if more than one, separate with
+ # + # @param options [Hash] options hash provided by Blacklight + # @return [String] snippets HTML to be rendered + # rubocop:disable Rails/OutputSafety + def render_ocr_snippets(options = {}) + snippets = options[:value] + snippets_content = [tag.div("... #{snippets.first} ...".html_safe, + class: 'ocr_snippet first_snippet')] + if snippets.length > 1 + snippets_content << render(partial: 'catalog/snippets_more', + locals: { + snippets: snippets.drop(1), + options: options + }) + end + snippets_content.join("\n").html_safe + end + # rubocop:enable Rails/OutputSafety + end +end diff --git a/app/models/search_builder.rb b/app/models/search_builder.rb index f248bb2c..d058ed51 100644 --- a/app/models/search_builder.rb +++ b/app/models/search_builder.rb @@ -1,10 +1,15 @@ # frozen_string_literal: true +require './lib/newspaper_works/highlight_search_params.rb' + class SearchBuilder < Blacklight::SearchBuilder include Blacklight::Solr::SearchBuilderBehavior include BlacklightRangeLimit::RangeLimitBuilder - include BlacklightAdvancedSearch::AdvancedSearchBuilder - self.default_processor_chain += [:add_advanced_parse_q_to_solr, :add_advanced_search_to_solr] + include NewspaperWorks::HighlightSearchParams + + self.default_processor_chain += [ + :add_advanced_parse_q_to_solr, :add_advanced_search_to_solr, :highlight_search_params + ] ## # @example Adding a new step to the processor chain diff --git a/app/views/catalog/_snippets_more.html.erb b/app/views/catalog/_snippets_more.html.erb new file mode 100644 index 00000000..e5e2ea3f --- /dev/null +++ b/app/views/catalog/_snippets_more.html.erb @@ -0,0 +1,23 @@ +<%# +# gem 'newspaper_works', v1.0.2 +# Refer to the gem repository for more details: https://github.com/samvera-labs/newspaper_works +# Released under license Apache License 2.0: https://github.com/samvera-labs/newspaper_works/blob/main/LICENSE +# This gem is not yet compatible with Hyrax v3, hence why I am only using the portions relevant to our use case +# This gem is used for keyword highlighting in search results +%> +<%# additional ocr snippets, with a Bootstrap collapse toggle control %> +<% document_id = options[:document].id %> +
"> + <% snippets.each do |snippet| %> + <%= content_tag('div', + "... #{snippet} ...".html_safe, + class: 'ocr_snippet') %> + <% end %> +
+<%= link_to(t('blacklight.search.results.snippets.more'), + "#snippet_collapse_#{document_id}", + data: {toggle: 'collapse'}, + 'aria-expanded' => 'false', + 'aria-controls' => "#snippet_collapse_#{document_id}", + class: 'ocr_snippets_expand js-controls') +%> \ No newline at end of file diff --git a/config/locales/blacklight.en.yml b/config/locales/blacklight.en.yml index adb29f1e..7ef0e312 100644 --- a/config/locales/blacklight.en.yml +++ b/config/locales/blacklight.en.yml @@ -36,9 +36,15 @@ en: search: facets: title: 'Browse All Items' + fields: + all_text_tsimv: 'Full-text matches' form: search: placeholder: 'Search Digital Collections' + results: + snippets: + less: '<< less' + more: 'more >>' search_history: recent: 'Your recent searches (cleared after your browser session)' work: diff --git a/lib/newspaper_works/highlight_search_params.rb b/lib/newspaper_works/highlight_search_params.rb new file mode 100644 index 00000000..c5ea77aa --- /dev/null +++ b/lib/newspaper_works/highlight_search_params.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true +# gem 'newspaper_works', v1.0.2 +# Refer to the gem repository for more details: https://github.com/samvera-labs/newspaper_works +# Released under license Apache License 2.0: https://github.com/samvera-labs/newspaper_works/blob/main/LICENSE +# This gem is not yet compatible with Hyrax v3, hence why I am only using the portions relevant to our use case +# This gem is used for keyword highlighting in search results + +module NewspaperWorks + # add highlighting on _stored_ full text field if this is a keyword search + # can be added to default_processor_chain in a SearchBuilder class + module HighlightSearchParams + # add highlights on full text field, if there is a keyword query + def highlight_search_params(solr_parameters = {}) + return unless solr_parameters[:q] || solr_parameters[:all_fields] + solr_parameters[:hl] = true + solr_parameters[:'hl.fl'] = 'all_text_tsimv' + solr_parameters[:'hl.fragsize'] = 100 + solr_parameters[:'hl.snippets'] = 5 + end + end +end diff --git a/spec/controllers/catalog_controller_spec.rb b/spec/controllers/catalog_controller_spec.rb index 0255cbc2..5c2e7f03 100644 --- a/spec/controllers/catalog_controller_spec.rb +++ b/spec/controllers/catalog_controller_spec.rb @@ -45,7 +45,8 @@ 'holding_repository_tesim', 'human_readable_content_type', 'human_readable_date_created_tesim', - 'visibility_group_ssi'] + 'visibility_group_ssi', + 'all_text_tsimv'] end it { expect(index_fields).to contain_exactly(*expected_index_fields) } end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index f48b926e..d0483de1 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,5 +1,9 @@ # frozen_string_literal: true require 'simplecov' +require 'webdrivers/chromedriver' + +Webdrivers::Chromedriver.required_version = "114.0.5735.90" unless ENV['CI'] + SimpleCov.start 'rails' do add_filter '/spec/' # for rspec add_filter '/app/jobs/' diff --git a/spec/support/solr_documents/curate_generic_work.rb b/spec/support/solr_documents/curate_generic_work.rb index 11a238a8..9609b7d2 100644 --- a/spec/support/solr_documents/curate_generic_work.rb +++ b/spec/support/solr_documents/curate_generic_work.rb @@ -81,5 +81,6 @@ thumbnail_path_ss: ['/downloads/825x69p8dh-cor?file=thumbnail'], hasRelatedImage_ssim: ['825x69p8dh-cor'], human_readable_visibility_ssi: 'Public', - visibility_group_ssi: 'Public' + visibility_group_ssi: 'Public', + all_text_tsimv: ["This is the story of Teddy Longfellow, who lived to a hundred and three!"] }.freeze diff --git a/spec/system/view_search_results_spec.rb b/spec/system/view_search_results_spec.rb index 12b0f882..be0bed2e 100644 --- a/spec/system/view_search_results_spec.rb +++ b/spec/system/view_search_results_spec.rb @@ -8,6 +8,7 @@ solr.commit ENV['THUMBNAIL_URL'] = 'http://obviously_fake_url.com' end + after { delete_all_documents_from_solr } let(:collection_id) { COLLECTION[:id] } let(:parent_work_id) { PARENT_CURATE_GENERIC_WORK[:id] } @@ -86,4 +87,19 @@ expect(page).to have_css('dl.document-heading-second-row') end end + + context 'when searching for a work indexed for full-text searching' do + it 'returns only the simple work with the expected elements' do + visit "/" + fill_in 'q', with: 'teddy longfellow' + click_on('search') + + expect(find_all('#documents article header h3 a').size).to eq(1) + expect(page).to have_content('The Title of my Work') + expect(page).to have_content('Full-text matches:') + expect(page).to have_content( + '... This is the story of Teddy Longfellow, who lived to a hundred and three! ...' + ) + end + end end