Skip to content

Commit

Permalink
Merge branch 'development' into 1476-admin-collection-list
Browse files Browse the repository at this point in the history
  • Loading branch information
benwbrum committed Jun 14, 2021
2 parents 22578d2 + e9306cf commit bcc5ab0
Show file tree
Hide file tree
Showing 12 changed files with 8,718 additions and 88,548 deletions.
5 changes: 5 additions & 0 deletions app/assets/stylesheets/sections/page.scss
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@
display: flex;
box-sizing: border-box;
flex-direction: column;

[data-layout-mode='ttb'] &,
[data-layout-mode='btt'] & {
min-height: calc((100vh - 74px - 3rem) / 2);
}
}

.page-imagescan {
Expand Down
2 changes: 2 additions & 0 deletions app/helpers/transcribe_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def osd_source(page)
["#{@page.sc_canvas.sc_service_id}/info.json"]
elsif page.ia_leaf
[@page.ia_leaf.iiif_image_info_url]
elsif browser.platform.ios? && browser.webkit?
["#{url_for(:root)}image-service/#{page.id}/info.json"]
else
{type: 'image', url: file_to_url(page.canonical_facsimile_url)}.to_json
end
Expand Down
2 changes: 1 addition & 1 deletion config/locales/dashboard/dashboard-en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ en:
click_to_browse_files: "Click to browse files"
browse: "Browse"
use_image_filenames: " Use image filenames as page titles."
use_ocr_from_pdf: " Use OCR from PDF text layer."
use_ocr_from_pdf: " Import text from PDF text layers, text files or XML files."
zip_files_may_contain: "ZIP files may contain folders containing images, PDFs, or folders containing pdfs."
each_folder_will_be_treated: "Each folder will be treated as a different document, so do not mix pages from different documents in the same folder."
each_pdf_will_be_treated: "Each PDF will be treated as its own document, so do not split pages from the same document among more than one PDF."
Expand Down
11 changes: 8 additions & 3 deletions lib/tasks/ingestor.rake
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,10 @@ namespace :fromthepage do

if document_upload.ocr
clean_dir=path.gsub('[','\[').gsub(']','\]')
if Dir.glob(File.join(clean_dir, "*.txt")).count > 0
if (Dir.glob(File.join(clean_dir, "*.txt")).count + Dir.glob(File.join(clean_dir, "*.xml")).count) > 0
work.ocr_correction = true
else
print "\tOCR correction specified but no files found in #{File.join(path, "page*.txt")}\n"
print "\tOCR correction specified but no files found in #{File.join(path, "page*.txt")} or #{File.join(path, "page*.xml")}\n"
end
end

Expand Down Expand Up @@ -293,7 +293,12 @@ namespace :fromthepage do
page.base_width = image.columns
if work.ocr_correction
ocr_fn = File.join(path, File.basename(image_fn.gsub(IMAGE_FILE_EXTENSIONS_PATTERN, "txt")))
if File.exist? ocr_fn
xml_fn = File.join(path, File.basename(image_fn.gsub(IMAGE_FILE_EXTENSIONS_PATTERN, "xml")))
if File.exist? xml_fn
print "\t\tconvert_to_work reading raw XML text from #{xml_fn}\n"
page.source_text = File.read(xml_fn).gsub(/\[+/, '[').gsub(/\]+/, ']')
# if there are errors, consider escaping
elsif File.exist? ocr_fn
print "\t\tconvert_to_work reading raw OCR text from #{ocr_fn}\n"
page.source_text = File.read(ocr_fn).encode(:xml => :text).gsub(/\[+/, '[').gsub(/\]+/, ']')
end
Expand Down
2 changes: 1 addition & 1 deletion spec/features/add_data_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
page.execute_script(script)

attach_file('document_upload_file', './test_data/uploads/ocr.pdf')
page.check('Use OCR from PDF text layer.')
page.check('Import text from PDF text layers, text files or XML files.')
click_button('Upload File')
title = find('h1').text
expect(title).to eq @collection.title
Expand Down
30 changes: 30 additions & 0 deletions spec/features/export_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,36 @@
expect(page).to have_content(@work.title)
page.find('#btnExportAll').click
expect(page.response_headers['Content-Type']).to eq 'text/html; charset=utf-8'

page.check('bulk_export_html_page')
page.check('bulk_export_html_work')
page.check('bulk_export_plaintext_verbatim_page')
page.check('bulk_export_plaintext_verbatim_work')
page.check('bulk_export_plaintext_emended_work')
page.check('bulk_export_plaintext_emended_page')
page.check('bulk_export_plaintext_searchable_work')
page.check('bulk_export_plaintext_searchable_page')
page.check('bulk_export_tei_work')
page.check('bulk_export_table_csv_work')
page.check('bulk_export_table_csv_collection')
page.check('bulk_export_subject_csv_collection')
page.check('bulk_export_work_metadata_csv')

page.find('button', text: 'Start Export').click
expect(page).to have_content("Queued")

login_as(User.where(admin: true).first, :scope => :user)

# wait for the background process to run
1.upto(10) do
sleep 5
if BulkExport.last.status == 'finished'
break
end
end

visit bulk_export_index_path
expect(page).to have_content("Finished")
end

it "exports the subject index" do
Expand Down
Loading

0 comments on commit bcc5ab0

Please sign in to comment.