Skip to content

Commit

Permalink
Green - refactor headers to be parsed along with the rest of the CSV
Browse files Browse the repository at this point in the history
  • Loading branch information
maxkadel committed Sep 30, 2021
1 parent 4197066 commit 85c7879
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 24 deletions.
40 changes: 25 additions & 15 deletions app/uploaders/zizia/csv_manifest_validator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,29 @@ def default_delimiter
end

def valid_headers
Zizia::HyraxBasicMetadataMapper.new.headers.map(&:to_s)
Zizia::HyraxBasicMetadataMapper.new.headers.map do |header|
if header.class == Symbol
header
else
header.parameterize.underscore.to_sym
end
end
end

def parse_csv
@parsed_csv = CSV.table(csv_file.path)
@rows = CSV.read(csv_file.path).reject { |x| x.empty? || x.all?(nil) }
@headers = @rows.first || []
@transformed_headers = @headers.map { |header| header.downcase.strip }
rescue
@errors << 'We are unable to read this CSV file.'
end

def headers
@headers ||= @parsed_csv.headers
end

def object_types
return ["work"] unless @transformed_headers.include?("object type")
original_object_types = @rows.map { |row| row[@transformed_headers.find_index("object type")] }
return ["work"] unless headers.include?(:object_type)
original_object_types = @rows.map { |row| row[headers.find_index(:object_type)] }
original_object_types.map { |object_type| map_object_type(object_type) }.compact.uniq
end

Expand All @@ -88,8 +97,9 @@ def map_object_type(orig_value)

def missing_headers
required_headers_for_sheet.each do |header|
next if @transformed_headers.include?(header)
@errors << "Missing required column: \"#{header.titleize}\". Your spreadsheet must have this column."
header = header.parameterize.underscore.to_sym
next if headers.include?(header)
@errors << "Missing required column: \"#{header.to_s.titleize}\". Your spreadsheet must have this column."
end
end

Expand Down Expand Up @@ -117,18 +127,18 @@ def work_headers

def duplicate_headers
duplicates = []
sorted_headers = @transformed_headers.sort
sorted_headers = headers.sort
sorted_headers.each_with_index do |x, i|
duplicates << x if x == sorted_headers[i + 1]
end
duplicates.uniq.each do |header|
@errors << "Duplicate column names: You can have only one \"#{header.titleize}\" column."
@errors << "Duplicate column names: You can have only one \"#{header.to_s.titleize}\" column."
end
end

# Warn the user if we find any unexpected headers.
def unrecognized_headers
extra_headers = @transformed_headers - valid_headers
extra_headers = headers - valid_headers
extra_headers.each do |header|
@warnings << "The field name \"#{header}\" is not supported. This field will be ignored, and the metadata for this field will not be imported."
end
Expand All @@ -145,17 +155,17 @@ def missing_values
end

def required_column_numbers(row)
if @transformed_headers.include?("object type")
required_headers(object_type(row)).map { |header| @transformed_headers.find_index(header) }.compact
if headers.include?(:object_type)
required_headers(object_type(row)).map { |header| headers.find_index(header.parameterize.underscore.to_sym) }.compact
else
required_headers.map { |header| @transformed_headers.find_index(header) }.compact
required_headers.map { |header| headers.find_index(header.parameterize.underscore.to_sym) }.compact
end
end

private

def object_type(row)
row[@transformed_headers.find_index("object type")]&.downcase
row[headers.find_index(:object_type)]&.downcase
end

# Only allow valid license values expected by Hyrax.
Expand Down Expand Up @@ -194,7 +204,7 @@ def valid_object_types

# Make sure this column contains only valid values
def validate_values(header_name, valid_values_method, case_insensitive = false)
column_number = @transformed_headers.find_index(header_name)
column_number = headers.find_index(header_name.parameterize.underscore.to_sym)
return unless column_number

@rows.each_with_index do |row, i|
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
parent,parent,object type,identifier,license,deduplication_key,visibility,location,keyword,rights statement,creator,title,files
,def/123,w,abc/123,https://creativecommons.org/licenses/by/4.0/,abc/123,PUBlic,http://www.geonames.org/5667009/montana.html|~|http://www.geonames.org/6252001/united-states.html,Clothing stores $z California $z Los Angeles|~|Interior design $z California $z Los Angeles,http://rightsstatements.org/vocab/InC/1.0/,"Connell, Will, $d 1898-1961","Interior view of The Bachelors haberdashery designed by Julius Ralph Davidson, Los Angeles, circa 1929",dog.jpg
,,C,,,7,Public,,,,,Test collection,
7 changes: 7 additions & 0 deletions spec/dummy/spec/fixtures/csv_import/good/many_files.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
object type,title,creator,keyword,rights statement,visibility,files,identifier,deduplication_key,parent
Collection,Collection of fruits,,,,public,,def/234,def/234,
w,Work on tomatoes,"Tomato, Tommy",fruits|~|tomatoes,http://rightsstatements.org/vocab/InC/1.0/,public,,abc/123,abc/123,def/234
f,,,,,,dog.jpg,,,abc/123
f,,,,,,birds.jpg,,,abc/123
f,,,,,,cat.jpg,,,abc/123
f,,,,,,zizia.png,,,abc/123
78 changes: 78 additions & 0 deletions spec/dummy/spec/system/import_from_csv_separate_files_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# frozen_string_literal: true
require 'rails_helper'
include Warden::Test::Helpers

RSpec.describe 'Importing records from a CSV file', :perform_jobs, clean: true, type: :system, js: true do
before do
allow(CharacterizeJob).to receive(:perform_later)
end

around do |example|
orig_import_path = ENV['IMPORT_PATH']
ENV['IMPORT_PATH'] = File.join(fixture_path, 'images')
example.run
ENV['IMPORT_PATH'] = orig_import_path
end

let(:csv_file) { File.join(fixture_path, 'csv_import', 'good', 'many_files.csv') }
let(:test_strategy) { Flipflop::FeatureSet.current.test! }

context 'logged in as an admin user' do
let(:admin_user) { FactoryBot.create(:admin) }

let(:admin_set_id) { AdminSet.find_or_create_default_admin_set_id }
let(:permission_template) { Hyrax::PermissionTemplate.find_or_create_by!(source_id: admin_set_id) }
let(:default_collection_type) { Hyrax::CollectionType.find_or_create_default_collection_type }
let(:workflow) { Sipity::Workflow.create!(active: true, name: 'test-workflow', permission_template: permission_template) }

before do
# Create the default collection type in order to create a new collection
default_collection_type
# Create a single action that can be taken
Sipity::WorkflowAction.create!(name: 'submit', workflow: workflow)

# Grant the user access to deposit into the admin set.
Hyrax::PermissionTemplateAccess.create!(
permission_template_id: permission_template.id,
agent_type: 'user',
agent_id: admin_user.user_key,
access: 'deposit'
)

login_as admin_user
end

context 'using the new UI' do
before do
test_strategy.switch!(:new_zizia_ui, true)
end

it 'creates a collection and a work via the UI' do
pending('Not requiring a work to have info in the files row if it has files elsewhere on the sheet')
visit '/csv_imports/new'
# Fill in and submit the form
expect do
select 'Update Existing Metadata, create new works', from: "csv_import[update_actor_stack]"
attach_file('csv_import[manifest]', csv_file, make_visible: true)
expect(page).to have_content('You sucessfully uploaded this CSV: many_files.csv')

click_on 'Preview Import'

expect(page).to have_content 'This import will process 6 row(s).'

click_on 'Start Import'
end.to change { Work.count }.by(1)
.and change { Collection.count }.by(1)
.and change { FileSet.count }.by(4)

# The show page for the CsvImport
expect(page).to have_content 'many_files.csv'
expect(page).to have_content 'Start time'

# Ensure that all the fields got assigned as expected
work_one = Work.where(title: "*tomatoes*").first
expect(work_one.title.first).to match(/tomatoes/)
end
end
end
end
56 changes: 47 additions & 9 deletions spec/uploaders/zizia/csv_manfest_validator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@
subject(:validator) { described_class.new(uploader) }

let(:uploader) { Zizia::CsvManifestUploader.new }

let(:required_file_plus_work_headers) { ['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key', 'parent'] }
let(:required_work_headers) { ['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key'] }
let(:required_collection_headers) { ['title', 'visibility'] }
let(:required_file_headers) { ["files", "parent"] }
# let(:required_file_plus_work_headers) {[:title, :creator, :keyword, :rights_statement, :visibility, :files, :deduplication_key, :parent]}
# let(:required_work_headers) {[:title, :creator, :keyword, :rights_statement, :visibility, :files, :deduplication_key]}
# let(:required_collection_headers) { [:title, :visibility] }
# let(:required_file_headers) { [:files, :parent] }
before do
Zizia::CsvManifestUploader.enable_processing = true
File.open(path_to_file) { |f| uploader.store!(f) }
Expand Down Expand Up @@ -48,7 +55,6 @@
end

it "collects all the object_types in the file" do
required_file_plus_work_headers = ['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key', 'parent']
expect(validator.object_types).to match_array(["collection", "work", "file"])
expect(validator.required_headers_for_sheet).to match_array(required_file_plus_work_headers)
end
Expand Down Expand Up @@ -83,19 +89,16 @@
let(:collection_row) { 'C,,,,7,Public,,,,,Test collection,' }

it "thinks valid fields is the same as hyraxbasicmetadata.fields" do
expect(validator.send(:valid_headers).sort).to eq(Zizia::HyraxBasicMetadataMapper.new.headers.map(&:to_s).sort)
valid_headers = [:abstract_or_summary, :bibliographic_citation, :contributor, :creator, :date_created, :deduplication_key, :files, :identifier, :keyword, :language, :license, :location, :object_type, :parent, :publisher, :related_url, :resource_type, :rights_statement, :source, :subject, :title, :visibility]
expect(validator.send(:valid_headers).sort).to match_array(valid_headers)
end

it "collects all the object_types in the file" do
required_work_headers = ['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key']
expect(validator.object_types).to eq(["work", "collection"])
expect(validator.required_headers_for_sheet).to match_array(required_work_headers)
end

it "returns required headers based on the object type" do
required_work_headers = ['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key']
required_collection_headers = ['title', 'visibility']
required_file_headers = ["files", "parent"]
expect(validator.required_headers).to eq(required_work_headers)
expect(validator.required_headers("w")).to eq(required_work_headers)
expect(validator.required_headers("c")).to eq(required_collection_headers)
Expand All @@ -113,15 +116,50 @@
end
end

context "without an object type column" do
context "without an object type column and empty rows" do
let(:path_to_file) { Rails.root.join('spec', 'fixtures', 'csv_import', 'good', 'all_fields_only_new.csv') }
let(:work_row) do
'abc/123,https://creativecommons.org/licenses/by/4.0/,abc/123,PUBlic,http://www.geonames.org/5667009/montana.html|~|http://www.geonames.org/6252001/united-states.html,Clothing stores $z California $z Los Angeles|~|Interior design $z California $z Los Angeles,http://rightsstatements.org/vocab/InC/1.0/,"Connell, Will, $d 1898-1961","Interior view of The Bachelors haberdashery designed by Julius Ralph Davidson, Los Angeles, circa 1929",dog.jpg
'
end
it "still gives required headers and their associated column numbers" do
expect(validator.required_headers).to eq(['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key'])
expect(validator.required_headers).to match_array(required_work_headers)
expect(validator.required_column_numbers(work_row)).to eq([8, 7, 5, 6, 3, 9, 2])
end

it "still validates the file" do
validator.validate
expect(validator.errors).to eq([])
expect(validator.warnings).to eq([])
end

it "still gives the expected headers" do
sheet_headers = [:identifier, :license, :deduplication_key, :visibility, :location, :keyword, :rights_statement, :creator, :title, :files]
expect(validator.headers). to match_array(sheet_headers)
end
end

context "with files for a work on a separate row" do
let(:path_to_file) { Rails.root.join('spec', 'fixtures', 'csv_import', 'good', 'many_files.csv') }

it "still gives required headers for entire sheet" do
expect(validator.required_headers_for_sheet).to match_array(required_file_plus_work_headers)
end

it "does not require a file row value for a work if the files are on a separate row" do
pending("Validating the CSV file as a whole")
validator.validate
expect(validator.errors).to eq([])
expect(validator.warnings).to eq([])
end
end

context "with duplicate headers" do
let(:path_to_file) { Rails.root.join('spec', 'fixtures', 'csv_import', 'csv_files_with_problems', 'duplicate_headers.csv') }
it "gives a warning for duplicate headers" do
validator.validate
expect(validator.errors).to match_array(['Duplicate column names: You can have only one "Parent" column.'])
expect(validator.warnings).to eq([])
end
end
end

0 comments on commit 85c7879

Please sign in to comment.