Skip to content

Commit

Permalink
Merge pull request #8 from anusharanganathan/feature_rake_for_indexing
Browse files Browse the repository at this point in the history
Feature rake for indexing
  • Loading branch information
Anusha Ranganathan committed Dec 15, 2015
2 parents ce43e62 + ba280a2 commit 8ced1d2
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@
#config/redis.yml
#config/secrets.yml

# Ignore local settings
config/settings.local.yml
config/settings/*.local.yml
config/environments/*.local.yml

#Ignore all data files
data/*
!data/.keep
Empty file added data/.keep
Empty file.
58 changes: 58 additions & 0 deletions lib/tasks/colligo.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
require 'csv'
namespace :colligo do
desc "index mods.xml and Annotations.json for each iiif manifest part of the collection"
task :index_iiif_manifests, [:collection, :csv_file] => [:environment] do |t, args|
raise "File #{args.csv_file} does not exist" unless File.exist?(args.csv_file)
@conn = Blacklight.default_index.connection
CSV.foreach(args.csv_file) do |row|
# get url
@url = row[0]
# get iiif manifest
@manifest = read_manifest
unless @manifest.title.blank? || @manifest.druid.blank?
# initialize solr document model
@modsxml = @manifest.get_modsxml
@doc = SolrDocument.new(modsxml: @modsxml, druid: @manifest.druid,
collection: args.collection, iiif_manifest: @url, mods_url: @manifest.mods_url)
# index mods xml
index_mods
# index annotations
@annotation_lists = @manifest.annotation_lists
index_annotations
end
end
@conn.commit
end
end

def read_manifest
@manifest = IiifManifest.new(@url)
@manifest.read_manifest
return @manifest
end

def index_mods
# index mods data in solr
solr_doc = @doc.mods_to_solr
@conn.add solr_doc
end

def index_annotations
return unless @annotation_lists
# array of hashes with key '@id', '@type', 'label'
# method to index each annotation
@annotation_lists.each do |al|
annotation_list = @doc.read_annotation(al['@id'])
if annotation_list.has_key("resources")
annotation_list["resources"].each do |a|
data = { "annotation" => a, "manuscript" => @doc.title, "folio" => al['label'], "url" => al['@id'] }
solr_doc = @doc.annotation_to_solr(data)
@conn.add solr_doc
end
end
end
end

def commit
@conn.commit
end

0 comments on commit 8ced1d2

Please sign in to comment.