-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Script: Split "Subspecies of" history items #1120
Labels
Comments
Batch 1
BATCH = 1
EDIT_SUMMARY = "Split 'Subspecies of' history items [batch #{BATCH}], see %github1120"
ANTCATBOT = User.find_by!(name: 'AntCatBot')
# For activities and PaperTrail.
RequestStore.store[:current_request_uuid] = SecureRandom.uuid
PaperTrail.request.whodunnit = ANTCATBOT.id
def split_subspecies_of_item! old_item, expected_tax:, dry_run: false
old_taxt = old_item.taxt.dup
split = old_taxt.split('; of ')
num_tax_tags = old_taxt.scan('{tax').size
if expected_tax == :balanced
raise "tax and 'in' unbalanced" unless num_tax_tags == split.size
else
raise "should be #{expected_tax} tax tag" unless old_taxt.scan('{tax').size == expected_tax
raise "should be #{expected_tax} '; of'" unless split.size == expected_tax
end
taxt_to_update = split.first
taxt_to_update << '.' unless taxt_to_update.ends_with?('.')
tail_taxts = split[1..]
taxts_to_create = tail_taxts.map do |taxt_to_create_part|
string = "Subspecies of " << taxt_to_create_part
string << '.' unless string.ends_with?('.')
string
end
puts "ID: #{old_item.id}"
puts "num_tax_tags: #{num_tax_tags}"
puts "split.size: #{split.size}"
puts "original: #{old_taxt}".yellow
puts "update: #{taxt_to_update}".green
taxts_to_create.each do |taxt_to_create|
puts "create: #{taxt_to_create}".blue
end
puts
return if dry_run
old_item.update!(taxt: taxt_to_update)
new_items = taxts_to_create.reverse.map do |taxt_to_create|
new_item = TaxonHistoryItem.create!(taxon: old_item.taxon, taxt: taxt_to_create, position: old_item.position + 1)
create_activity new_item, :create, "from ##{old_item.id}"
end
create_activity old_item, :update, "to ##{new_items.map(&:id)}"
end
def create_activity history_item, action, item_edit_summary
history_item.create_activity(
action,
ANTCATBOT,
edit_summary: "Split 'Subspecies of' history items (split #{item_edit_summary}) [batch #{BATCH}], see %github1120"
)
end
def regex_split! mysql_regex
items = TaxonHistoryItem.where("taxt REGEXP ?", mysql_regex); nil
item_ids = items.pluck(:id); nil # Hack for `dry_run`.
puts "items: #{items.count}".yellow
puts "item_ids: #{item_ids.size}".yellow
puts "total history items: #{TaxonHistoryItem.count}".yellow
TaxonHistoryItem.where(id: item_ids).find_each do |history_item|
split_subspecies_of_item! history_item, expected_tax: :balanced, dry_run: false
end; nil
puts "items: #{items.count}".yellow
puts "item_ids: #{item_ids.size}".yellow
puts "total history items: #{TaxonHistoryItem.count}".yellow
end
tax = "{tax [0-9]+}"
tax_ish = "({tax(ac)? [0-9]+}|{missing[0-9]? .+}|[A-z][a-z]+ [a-z]+( [a-z]+)?)"
one_or_many_tax_ish = "(#{tax_ish}|(#{tax_ish}, )+#{tax_ish}?)"
ref = '({ref [0-9]+}: [0-9a-z]+)'
one_or_many_refs = "(#{ref}|(#{ref}; )+#{ref}?)"
ref_w_extras = '({ref [0-9]+}: [0-9a-z]+( \((in key|in text|footnote)\))?)'
one_or_many_refs_w_extras = "(#{ref_w_extras}|(#{ref_w_extras}; )+#{ref_w_extras}?)"
# ---
regex_split! "^Subspecies of #{tax}: #{one_or_many_refs}; of #{tax}: #{one_or_many_refs}$"
regex_split! "^Subspecies of #{tax}: #{one_or_many_refs}; of #{tax}: #{one_or_many_refs}.$"
regex_split! "^Subspecies of #{tax}: #{one_or_many_refs};( of #{tax}: #{one_or_many_refs};)+ of #{tax}: #{one_or_many_refs}$"
regex_split! "^Subspecies of #{tax}: #{one_or_many_refs};( of #{tax}: #{one_or_many_refs};)+ of #{tax}: #{one_or_many_refs}.$"
# ---
regex_split! "^Subspecies of #{tax}: #{one_or_many_refs_w_extras}; of #{tax}: #{one_or_many_refs_w_extras}$"
regex_split! "^Subspecies of #{tax}: #{one_or_many_refs_w_extras}; of #{tax}: #{one_or_many_refs_w_extras}.$"
regex_split! "^Subspecies of #{tax}: #{one_or_many_refs_w_extras};( of #{tax}: #{one_or_many_refs_w_extras};)+ of #{tax}: #{one_or_many_refs_w_extras}$"
regex_split! "^Subspecies of #{tax}: #{one_or_many_refs_w_extras};( of #{tax}: #{one_or_many_refs_w_extras};)+ of #{tax}: #{one_or_many_refs_w_extras}.$"
# ---
Activity.execute_script_activity User.find_by!(name: 'Fredrik Palmkron'), EDIT_SUMMARY
|
Batch 2
BATCH = 2
EDIT_SUMMARY = "Split 'Subspecies of' history items [batch #{BATCH}], see %github1120"
ANTCATBOT = User.find_by!(name: 'AntCatBot')
# For activities and PaperTrail.
RequestStore.store[:current_request_uuid] = SecureRandom.uuid
PaperTrail.request.whodunnit = ANTCATBOT.id
def split_subspecies_of_item! old_item, expected_tax: :unknown, dry_run: false
old_taxt = old_item.taxt.dup
split = old_taxt.split('; of ')
taxt_to_update = split.first
taxt_to_update << '.' unless taxt_to_update.ends_with?('.')
tail_taxts = split[1..]
taxts_to_create = tail_taxts.map do |taxt_to_create_part|
string = "Subspecies of " << taxt_to_create_part
string << '.' unless string.ends_with?('.')
string
end
puts "ID: #{old_item.id}"
puts "split.size: #{split.size}"
puts "original: #{old_taxt}".yellow
puts "update: #{taxt_to_update}".green
taxts_to_create.each do |taxt_to_create|
puts "create: #{taxt_to_create}".blue
end
puts
return if dry_run
old_item.update!(taxt: taxt_to_update)
new_items = taxts_to_create.reverse.map do |taxt_to_create|
new_item = TaxonHistoryItem.create!(taxon: old_item.taxon, taxt: taxt_to_create, position: old_item.position + 1)
create_activity new_item, :create, "from ##{old_item.id}"
end
create_activity old_item, :update, "to ##{new_items.map(&:id)}"
end
def create_activity history_item, action, item_edit_summary
history_item.create_activity(
action,
ANTCATBOT,
edit_summary: "Split 'Subspecies of' history items (split #{item_edit_summary}) [batch #{BATCH}], see %github1120"
)
end
def regex_split! mysql_regex
items = TaxonHistoryItem.where("taxt REGEXP ?", mysql_regex); nil
item_ids = items.pluck(:id); nil # Hack for `dry_run`.
puts "items: #{items.count}".yellow
puts "item_ids: #{item_ids.size}".yellow
puts "total history items: #{TaxonHistoryItem.count}".yellow
TaxonHistoryItem.where(id: item_ids).find_each do |history_item|
split_subspecies_of_item! history_item, expected_tax: :balanced, dry_run: false
end; nil
puts "items: #{items.count}".yellow
puts "item_ids: #{item_ids.size}".yellow
puts "total history items: #{TaxonHistoryItem.count}".yellow
end
tax = "{tax [0-9]+}"
tax_ish = "({tax(ac)? [0-9]+}|{missing[0-9]? .+}|[A-z][a-z]+ [a-z]+( [a-z]+)?)"
one_or_many_tax_ish = "(#{tax_ish}|(#{tax_ish}, )+#{tax_ish}?)"
ref = '({ref [0-9]+}: [0-9a-z]+)'
one_or_many_refs = "(#{ref}|(#{ref}; )+#{ref}?)"
ref_w_extras = '({ref [0-9]+}: [0-9a-z]+( \((in key|in text|footnote)\))?)'
one_or_many_refs_w_extras = "(#{ref_w_extras}|(#{ref_w_extras}; )+#{ref_w_extras}?)"
# ---
regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs}; of #{tax_ish}: #{one_or_many_refs}$"
regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs}; of #{tax_ish}: #{one_or_many_refs}.$"
regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs};( of #{tax_ish}: #{one_or_many_refs};)+ of #{tax_ish}: #{one_or_many_refs}$"
regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs};( of #{tax_ish}: #{one_or_many_refs};)+ of #{tax_ish}: #{one_or_many_refs}.$"
# ---
regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs_w_extras}; of #{tax_ish}: #{one_or_many_refs_w_extras}$"
regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs_w_extras}; of #{tax_ish}: #{one_or_many_refs_w_extras}.$"
regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs_w_extras};( of #{tax_ish}: #{one_or_many_refs_w_extras};)+ of #{tax_ish}: #{one_or_many_refs_w_extras}$"
regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs_w_extras};( of #{tax_ish}: #{one_or_many_refs_w_extras};)+ of #{tax_ish}: #{one_or_many_refs_w_extras}.$"
# ---
Activity.execute_script_activity User.find_by!(name: 'Fredrik Palmkron'), EDIT_SUMMARY
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The text was updated successfully, but these errors were encountered: