diff --git a/app/uploaders/zizia/csv_manifest_validator.rb b/app/uploaders/zizia/csv_manifest_validator.rb index 908775d..2c32f63 100644 --- a/app/uploaders/zizia/csv_manifest_validator.rb +++ b/app/uploaders/zizia/csv_manifest_validator.rb @@ -53,20 +53,29 @@ def default_delimiter end def valid_headers - Zizia::HyraxBasicMetadataMapper.new.headers.map(&:to_s) + Zizia::HyraxBasicMetadataMapper.new.headers.map do |header| + if header.class == Symbol + header + else + header.parameterize.underscore.to_sym + end + end end def parse_csv + @parsed_csv = CSV.table(csv_file.path) @rows = CSV.read(csv_file.path).reject { |x| x.empty? || x.all?(nil) } - @headers = @rows.first || [] - @transformed_headers = @headers.map { |header| header.downcase.strip } rescue @errors << 'We are unable to read this CSV file.' end + def headers + @headers ||= @parsed_csv.headers + end + def object_types - return ["work"] unless @transformed_headers.include?("object type") - original_object_types = @rows.map { |row| row[@transformed_headers.find_index("object type")] } + return ["work"] unless headers.include?(:object_type) + original_object_types = @rows.map { |row| row[headers.find_index(:object_type)] } original_object_types.map { |object_type| map_object_type(object_type) }.compact.uniq end @@ -88,8 +97,9 @@ def map_object_type(orig_value) def missing_headers required_headers_for_sheet.each do |header| - next if @transformed_headers.include?(header) - @errors << "Missing required column: \"#{header.titleize}\". Your spreadsheet must have this column." + header = header.parameterize.underscore.to_sym + next if headers.include?(header) + @errors << "Missing required column: \"#{header.to_s.titleize}\". Your spreadsheet must have this column." end end @@ -117,18 +127,18 @@ def work_headers def duplicate_headers duplicates = [] - sorted_headers = @transformed_headers.sort + sorted_headers = headers.sort sorted_headers.each_with_index do |x, i| duplicates << x if x == sorted_headers[i + 1] end duplicates.uniq.each do |header| - @errors << "Duplicate column names: You can have only one \"#{header.titleize}\" column." + @errors << "Duplicate column names: You can have only one \"#{header.to_s.titleize}\" column." end end # Warn the user if we find any unexpected headers. def unrecognized_headers - extra_headers = @transformed_headers - valid_headers + extra_headers = headers - valid_headers extra_headers.each do |header| @warnings << "The field name \"#{header}\" is not supported. This field will be ignored, and the metadata for this field will not be imported." end @@ -145,17 +155,17 @@ def missing_values end def required_column_numbers(row) - if @transformed_headers.include?("object type") - required_headers(object_type(row)).map { |header| @transformed_headers.find_index(header) }.compact + if headers.include?(:object_type) + required_headers(object_type(row)).map { |header| headers.find_index(header.parameterize.underscore.to_sym) }.compact else - required_headers.map { |header| @transformed_headers.find_index(header) }.compact + required_headers.map { |header| headers.find_index(header.parameterize.underscore.to_sym) }.compact end end private def object_type(row) - row[@transformed_headers.find_index("object type")]&.downcase + row[headers.find_index(:object_type)]&.downcase end # Only allow valid license values expected by Hyrax. @@ -194,7 +204,7 @@ def valid_object_types # Make sure this column contains only valid values def validate_values(header_name, valid_values_method, case_insensitive = false) - column_number = @transformed_headers.find_index(header_name) + column_number = headers.find_index(header_name.parameterize.underscore.to_sym) return unless column_number @rows.each_with_index do |row, i| diff --git a/spec/dummy/spec/fixtures/csv_import/csv_files_with_problems/duplicate_headers.csv b/spec/dummy/spec/fixtures/csv_import/csv_files_with_problems/duplicate_headers.csv new file mode 100644 index 0000000..2de2aec --- /dev/null +++ b/spec/dummy/spec/fixtures/csv_import/csv_files_with_problems/duplicate_headers.csv @@ -0,0 +1,3 @@ +parent,parent,object type,identifier,license,deduplication_key,visibility,location,keyword,rights statement,creator,title,files +,def/123,w,abc/123,https://creativecommons.org/licenses/by/4.0/,abc/123,PUBlic,http://www.geonames.org/5667009/montana.html|~|http://www.geonames.org/6252001/united-states.html,Clothing stores $z California $z Los Angeles|~|Interior design $z California $z Los Angeles,http://rightsstatements.org/vocab/InC/1.0/,"Connell, Will, $d 1898-1961","Interior view of The Bachelors haberdashery designed by Julius Ralph Davidson, Los Angeles, circa 1929",dog.jpg +,,C,,,7,Public,,,,,Test collection, diff --git a/spec/dummy/spec/fixtures/csv_import/good/many_files.csv b/spec/dummy/spec/fixtures/csv_import/good/many_files.csv new file mode 100644 index 0000000..15538e6 --- /dev/null +++ b/spec/dummy/spec/fixtures/csv_import/good/many_files.csv @@ -0,0 +1,7 @@ +object type,title,creator,keyword,rights statement,visibility,files,identifier,deduplication_key,parent +Collection,Collection of fruits,,,,public,,def/234,def/234, +w,Work on tomatoes,"Tomato, Tommy",fruits|~|tomatoes,http://rightsstatements.org/vocab/InC/1.0/,public,,abc/123,abc/123,def/234 +f,,,,,,dog.jpg,,,abc/123 +f,,,,,,birds.jpg,,,abc/123 +f,,,,,,cat.jpg,,,abc/123 +f,,,,,,zizia.png,,,abc/123 diff --git a/spec/dummy/spec/system/import_from_csv_separate_files_spec.rb b/spec/dummy/spec/system/import_from_csv_separate_files_spec.rb new file mode 100644 index 0000000..25c1c5a --- /dev/null +++ b/spec/dummy/spec/system/import_from_csv_separate_files_spec.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true +require 'rails_helper' +include Warden::Test::Helpers + +RSpec.describe 'Importing records from a CSV file', :perform_jobs, clean: true, type: :system, js: true do + before do + allow(CharacterizeJob).to receive(:perform_later) + end + + around do |example| + orig_import_path = ENV['IMPORT_PATH'] + ENV['IMPORT_PATH'] = File.join(fixture_path, 'images') + example.run + ENV['IMPORT_PATH'] = orig_import_path + end + + let(:csv_file) { File.join(fixture_path, 'csv_import', 'good', 'many_files.csv') } + let(:test_strategy) { Flipflop::FeatureSet.current.test! } + + context 'logged in as an admin user' do + let(:admin_user) { FactoryBot.create(:admin) } + + let(:admin_set_id) { AdminSet.find_or_create_default_admin_set_id } + let(:permission_template) { Hyrax::PermissionTemplate.find_or_create_by!(source_id: admin_set_id) } + let(:default_collection_type) { Hyrax::CollectionType.find_or_create_default_collection_type } + let(:workflow) { Sipity::Workflow.create!(active: true, name: 'test-workflow', permission_template: permission_template) } + + before do + # Create the default collection type in order to create a new collection + default_collection_type + # Create a single action that can be taken + Sipity::WorkflowAction.create!(name: 'submit', workflow: workflow) + + # Grant the user access to deposit into the admin set. + Hyrax::PermissionTemplateAccess.create!( + permission_template_id: permission_template.id, + agent_type: 'user', + agent_id: admin_user.user_key, + access: 'deposit' + ) + + login_as admin_user + end + + context 'using the new UI' do + before do + test_strategy.switch!(:new_zizia_ui, true) + end + + it 'creates a collection and a work via the UI' do + pending('Not requiring a work to have info in the files row if it has files elsewhere on the sheet') + visit '/csv_imports/new' + # Fill in and submit the form + expect do + select 'Update Existing Metadata, create new works', from: "csv_import[update_actor_stack]" + attach_file('csv_import[manifest]', csv_file, make_visible: true) + expect(page).to have_content('You sucessfully uploaded this CSV: many_files.csv') + + click_on 'Preview Import' + + expect(page).to have_content 'This import will process 6 row(s).' + + click_on 'Start Import' + end.to change { Work.count }.by(1) + .and change { Collection.count }.by(1) + .and change { FileSet.count }.by(4) + + # The show page for the CsvImport + expect(page).to have_content 'many_files.csv' + expect(page).to have_content 'Start time' + + # Ensure that all the fields got assigned as expected + work_one = Work.where(title: "*tomatoes*").first + expect(work_one.title.first).to match(/tomatoes/) + end + end + end +end diff --git a/spec/uploaders/zizia/csv_manfest_validator_spec.rb b/spec/uploaders/zizia/csv_manfest_validator_spec.rb index 74d78e9..dc78a02 100644 --- a/spec/uploaders/zizia/csv_manfest_validator_spec.rb +++ b/spec/uploaders/zizia/csv_manfest_validator_spec.rb @@ -6,7 +6,14 @@ subject(:validator) { described_class.new(uploader) } let(:uploader) { Zizia::CsvManifestUploader.new } - + let(:required_file_plus_work_headers) { ['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key', 'parent'] } + let(:required_work_headers) { ['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key'] } + let(:required_collection_headers) { ['title', 'visibility'] } + let(:required_file_headers) { ["files", "parent"] } + # let(:required_file_plus_work_headers) {[:title, :creator, :keyword, :rights_statement, :visibility, :files, :deduplication_key, :parent]} + # let(:required_work_headers) {[:title, :creator, :keyword, :rights_statement, :visibility, :files, :deduplication_key]} + # let(:required_collection_headers) { [:title, :visibility] } + # let(:required_file_headers) { [:files, :parent] } before do Zizia::CsvManifestUploader.enable_processing = true File.open(path_to_file) { |f| uploader.store!(f) } @@ -48,7 +55,6 @@ end it "collects all the object_types in the file" do - required_file_plus_work_headers = ['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key', 'parent'] expect(validator.object_types).to match_array(["collection", "work", "file"]) expect(validator.required_headers_for_sheet).to match_array(required_file_plus_work_headers) end @@ -83,19 +89,16 @@ let(:collection_row) { 'C,,,,7,Public,,,,,Test collection,' } it "thinks valid fields is the same as hyraxbasicmetadata.fields" do - expect(validator.send(:valid_headers).sort).to eq(Zizia::HyraxBasicMetadataMapper.new.headers.map(&:to_s).sort) + valid_headers = [:abstract_or_summary, :bibliographic_citation, :contributor, :creator, :date_created, :deduplication_key, :files, :identifier, :keyword, :language, :license, :location, :object_type, :parent, :publisher, :related_url, :resource_type, :rights_statement, :source, :subject, :title, :visibility] + expect(validator.send(:valid_headers).sort).to match_array(valid_headers) end it "collects all the object_types in the file" do - required_work_headers = ['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key'] expect(validator.object_types).to eq(["work", "collection"]) expect(validator.required_headers_for_sheet).to match_array(required_work_headers) end it "returns required headers based on the object type" do - required_work_headers = ['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key'] - required_collection_headers = ['title', 'visibility'] - required_file_headers = ["files", "parent"] expect(validator.required_headers).to eq(required_work_headers) expect(validator.required_headers("w")).to eq(required_work_headers) expect(validator.required_headers("c")).to eq(required_collection_headers) @@ -113,15 +116,50 @@ end end - context "without an object type column" do + context "without an object type column and empty rows" do let(:path_to_file) { Rails.root.join('spec', 'fixtures', 'csv_import', 'good', 'all_fields_only_new.csv') } let(:work_row) do 'abc/123,https://creativecommons.org/licenses/by/4.0/,abc/123,PUBlic,http://www.geonames.org/5667009/montana.html|~|http://www.geonames.org/6252001/united-states.html,Clothing stores $z California $z Los Angeles|~|Interior design $z California $z Los Angeles,http://rightsstatements.org/vocab/InC/1.0/,"Connell, Will, $d 1898-1961","Interior view of The Bachelors haberdashery designed by Julius Ralph Davidson, Los Angeles, circa 1929",dog.jpg ' end it "still gives required headers and their associated column numbers" do - expect(validator.required_headers).to eq(['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key']) + expect(validator.required_headers).to match_array(required_work_headers) expect(validator.required_column_numbers(work_row)).to eq([8, 7, 5, 6, 3, 9, 2]) end + + it "still validates the file" do + validator.validate + expect(validator.errors).to eq([]) + expect(validator.warnings).to eq([]) + end + + it "still gives the expected headers" do + sheet_headers = [:identifier, :license, :deduplication_key, :visibility, :location, :keyword, :rights_statement, :creator, :title, :files] + expect(validator.headers). to match_array(sheet_headers) + end + end + + context "with files for a work on a separate row" do + let(:path_to_file) { Rails.root.join('spec', 'fixtures', 'csv_import', 'good', 'many_files.csv') } + + it "still gives required headers for entire sheet" do + expect(validator.required_headers_for_sheet).to match_array(required_file_plus_work_headers) + end + + it "does not require a file row value for a work if the files are on a separate row" do + pending("Validating the CSV file as a whole") + validator.validate + expect(validator.errors).to eq([]) + expect(validator.warnings).to eq([]) + end + end + + context "with duplicate headers" do + let(:path_to_file) { Rails.root.join('spec', 'fixtures', 'csv_import', 'csv_files_with_problems', 'duplicate_headers.csv') } + it "gives a warning for duplicate headers" do + validator.validate + expect(validator.errors).to match_array(['Duplicate column names: You can have only one "Parent" column.']) + expect(validator.warnings).to eq([]) + end end end