Skip to content

Commit

Permalink
Updates to allow files in separate rows
Browse files Browse the repository at this point in the history
- Use required headers for whole sheet for preflight (combines requirements for object types)
- Find parent work based on deduplication_field
- Raise an error for now if parent work is not found. Otherwise the stack trace is misleading.
  • Loading branch information
maxkadel committed Sep 29, 2021
1 parent 155ddbf commit 60ef366
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 11 deletions.
5 changes: 4 additions & 1 deletion app/uploaders/zizia/csv_manifest_validator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def map_object_type(orig_value)
end

def missing_headers
required_headers.each do |header|
required_headers_for_sheet.each do |header|
next if @transformed_headers.include?(header)
@errors << "Missing required column: \"#{header.titleize}\". Your spreadsheet must have this column."
end
Expand All @@ -108,6 +108,9 @@ def required_headers(object_type = "w")
end
end

# TODO: Map these headers appropriately all the way through the ingest
# Right now the transformed headers only downcase and strip them, they don't translate them
# based on the associated mapper
def work_headers
['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key']
end
Expand Down
7 changes: 5 additions & 2 deletions lib/zizia/hyrax/hyrax_record_importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,12 @@ def create_curation_concern(record, import_type)
end
end

# TODO: Right now we assume that the parent is a Work, but it would be more generally
# applicable to let this be any sort of CurationConcern
def find_parent_work(record)
parent_identifier = record.parent.first
Work.where(identifier: parent_identifier).first
parent_work = Work.where("#{deduplication_field}": record.mapper.send(deduplication_field).to_s).first
return parent_work if parent_work.present?
raise "[zizia] Parent work for file not found, cannot attach file to work"
end

def create_file_set(record)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
object type,title,creator,keyword,rights statement,visibility,files,deduplication_key
c,Collection of vegetables,,,,public,,def/123
Collection,Collection of fruits,,,,public,,def/234
w,Work on tomatoes,"Tomato, Tommy",fruits|~|tomatoes,http://rightsstatements.org/vocab/InC/1.0/,public,birds.jpg,abc/123
wOrk,Work on cabbages,"Cabbage, Carl",vegetables|~|cabbages,http://rightsstatements.org/vocab/InC/1.0/,public,dog.jpg,abs/234
coLLEction,Collection of legumes,,,,public,,def/345
,Work without object type,"Cabbage, Carl",vegetables|~|cabbages,http://rightsstatements.org/vocab/InC/1.0/,public,dog.jpg,abc/345
f,,,,,,dog.jpg,,abc/123
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
object type,title,creator,keyword,rights statement,visibility,files,deduplication_key,parent
c,Collection of vegetables,,,,public,,def/123,
Collection,Collection of fruits,,,,public,,def/234,
w,Work on tomatoes,"Tomato, Tommy",fruits|~|tomatoes,http://rightsstatements.org/vocab/InC/1.0/,public,birds.jpg,abc/123,
wOrk,Work on cabbages,"Cabbage, Carl",vegetables|~|cabbages,http://rightsstatements.org/vocab/InC/1.0/,public,dog.jpg,abs/234,
coLLEction,Collection of legumes,,,,public,,def/345,
,Work without object type,"Cabbage, Carl",vegetables|~|cabbages,http://rightsstatements.org/vocab/InC/1.0/,public,dog.jpg,abc/345,
f,,,,,,dog.jpg,,
16 changes: 8 additions & 8 deletions spec/dummy/spec/fixtures/csv_import/good/mix_of_object_types.csv
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
object type,title,creator,keyword,rights statement,visibility,files,deduplication_key
c,Collection of vegetables,,,,public,,def/123
Collection,Collection of fruits,,,,public,,def/234
w,Work on tomatoes,"Tomato, Tommy",fruits|~|tomatoes,http://rightsstatements.org/vocab/InC/1.0/,public,birds.jpg,abc/123
wOrk,Work on cabbages,"Cabbage, Carl",vegetables|~|cabbages,http://rightsstatements.org/vocab/InC/1.0/,public,dog.jpg,abs/234
coLLEction,Collection of legumes,,,,public,,def/345
,Work without object type,"Cabbage, Carl",vegetables|~|cabbages,http://rightsstatements.org/vocab/InC/1.0/,public,dog.jpg,abc/345
f,,,,,,dog.jpg,,abc/123
object type,title,creator,keyword,rights statement,visibility,files,identifier,deduplication_key,parent
c,Collection of vegetables,,,,public,,def/123,def/123,
Collection,Collection of fruits,,,,public,,def/234,def/234,
w,Work on tomatoes,"Tomato, Tommy",fruits|~|tomatoes,http://rightsstatements.org/vocab/InC/1.0/,public,birds.jpg,abc/123,abc/123,
wOrk,Work on cabbages,"Cabbage, Carl",vegetables|~|cabbages,http://rightsstatements.org/vocab/InC/1.0/,public,dog.jpg,abs/234,abs/234,
coLLEction,Collection of legumes,,,,public,,def/345,def/345,
,Work without object type,"Cabbage, Carl",vegetables|~|cabbages,http://rightsstatements.org/vocab/InC/1.0/,public,dog.jpg,abc/345,abc/345,
f,,,,,,dog.jpg,,,abc/123
22 changes: 22 additions & 0 deletions spec/uploaders/zizia/csv_manfest_validator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,26 @@
end
end

context "with a mix of object types missing a field required for only one object type" do
let(:path_to_file) { Rails.root.join('spec', 'fixtures', 'csv_import', 'csv_files_with_problems', 'mix_of_object_types_missing_header.csv') }

it "gives an error for the missing header based on having a file row" do
validator.validate
expect(validator.errors).to eq(['Missing required column: "Parent". Your spreadsheet must have this column.'])
expect(validator.warnings).to eq([])
end
end

context "with a mix of object types missing a value required for only one object type" do
let(:path_to_file) { Rails.root.join('spec', 'fixtures', 'csv_import', 'csv_files_with_problems', 'mix_of_object_types_missing_parent_for_file.csv') }

it "gives an error for the missing value based on having a file row" do
validator.validate
expect(validator.errors).to eq(['Missing required metadata in row 8: "Parent" field cannot be blank'])
expect(validator.warnings).to eq([])
end
end

context "with a object type column" do
let(:path_to_file) { Rails.root.join('spec', 'fixtures', 'csv_import', 'good', 'Postcards_Minneapolis_w_collection.csv') }
let(:work_row) do
Expand All @@ -75,6 +95,7 @@
it "returns required headers based on the object type" do
required_work_headers = ['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files', 'deduplication_key']
required_collection_headers = ['title', 'visibility']
required_file_headers = ["files", "parent"]
expect(validator.required_headers).to eq(required_work_headers)
expect(validator.required_headers("w")).to eq(required_work_headers)
expect(validator.required_headers("c")).to eq(required_collection_headers)
Expand All @@ -83,6 +104,7 @@
expect(validator.required_headers("wOrk")).to eq(required_work_headers)
expect(validator.required_headers('garbage')).to eq(required_work_headers)
expect(validator.required_headers('')).to eq(required_work_headers)
expect(validator.required_headers('file')).to eq(required_file_headers)
end

it "returns different required column numbers based on the row" do
Expand Down

0 comments on commit 60ef366

Please sign in to comment.