Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Script: Split "Subspecies of" history items #1120

Closed
1 task done
jonkerz opened this issue Jul 26, 2020 · 2 comments
Closed
1 task done

Script: Split "Subspecies of" history items #1120

jonkerz opened this issue Jul 26, 2020 · 2 comments
Labels

Comments

@jonkerz
Copy link
Member

jonkerz commented Jul 26, 2020

@jonkerz jonkerz added the script label Jul 26, 2020
@jonkerz
Copy link
Member Author

jonkerz commented Jul 26, 2020

Batch 1

BATCH = 1
EDIT_SUMMARY = "Split 'Subspecies of' history items [batch #{BATCH}], see %github1120"
ANTCATBOT = User.find_by!(name: 'AntCatBot')

# For activities and PaperTrail.
RequestStore.store[:current_request_uuid] = SecureRandom.uuid
PaperTrail.request.whodunnit = ANTCATBOT.id

def split_subspecies_of_item! old_item, expected_tax:, dry_run: false
  old_taxt = old_item.taxt.dup
  split = old_taxt.split('; of ')

  num_tax_tags = old_taxt.scan('{tax').size

  if expected_tax == :balanced
    raise "tax and 'in' unbalanced" unless num_tax_tags == split.size
  else
   raise "should be #{expected_tax} tax tag" unless old_taxt.scan('{tax').size == expected_tax
   raise "should be #{expected_tax} '; of'" unless split.size == expected_tax
  end

  taxt_to_update = split.first
  taxt_to_update << '.' unless taxt_to_update.ends_with?('.')

  tail_taxts = split[1..]
  taxts_to_create = tail_taxts.map do |taxt_to_create_part|
    string = "Subspecies of " << taxt_to_create_part
    string << '.' unless string.ends_with?('.')
    string
  end

  puts "ID: #{old_item.id}"
  puts "num_tax_tags: #{num_tax_tags}"
  puts "split.size: #{split.size}"
  puts "original: #{old_taxt}".yellow
  puts "update:   #{taxt_to_update}".green
  taxts_to_create.each do |taxt_to_create|
    puts "create:   #{taxt_to_create}".blue
  end
  puts

  return if dry_run

  old_item.update!(taxt: taxt_to_update)
  new_items = taxts_to_create.reverse.map do |taxt_to_create|
    new_item = TaxonHistoryItem.create!(taxon: old_item.taxon, taxt: taxt_to_create, position: old_item.position + 1)
    create_activity new_item, :create, "from ##{old_item.id}"
  end

  create_activity old_item, :update, "to ##{new_items.map(&:id)}"
end

def create_activity history_item, action, item_edit_summary
  history_item.create_activity(
    action,
    ANTCATBOT,
    edit_summary: "Split 'Subspecies of' history items (split #{item_edit_summary}) [batch #{BATCH}], see %github1120"
  )
end

def regex_split! mysql_regex
  items = TaxonHistoryItem.where("taxt REGEXP ?", mysql_regex); nil
  item_ids = items.pluck(:id); nil # Hack for `dry_run`.

  puts "items: #{items.count}".yellow
  puts "item_ids: #{item_ids.size}".yellow
  puts "total history items: #{TaxonHistoryItem.count}".yellow

  TaxonHistoryItem.where(id: item_ids).find_each do |history_item|
    split_subspecies_of_item! history_item, expected_tax: :balanced, dry_run: false
  end; nil

  puts "items: #{items.count}".yellow
  puts "item_ids: #{item_ids.size}".yellow
  puts "total history items: #{TaxonHistoryItem.count}".yellow
end

tax = "{tax [0-9]+}"
tax_ish = "({tax(ac)? [0-9]+}|{missing[0-9]? .+}|[A-z][a-z]+ [a-z]+( [a-z]+)?)"
one_or_many_tax_ish = "(#{tax_ish}|(#{tax_ish}, )+#{tax_ish}?)"

ref = '({ref [0-9]+}: [0-9a-z]+)'
one_or_many_refs = "(#{ref}|(#{ref}; )+#{ref}?)"

ref_w_extras = '({ref [0-9]+}: [0-9a-z]+( \((in key|in text|footnote)\))?)'
one_or_many_refs_w_extras = "(#{ref_w_extras}|(#{ref_w_extras}; )+#{ref_w_extras}?)"

# ---

regex_split! "^Subspecies of #{tax}: #{one_or_many_refs}; of #{tax}: #{one_or_many_refs}$"
regex_split! "^Subspecies of #{tax}: #{one_or_many_refs}; of #{tax}: #{one_or_many_refs}.$"

regex_split! "^Subspecies of #{tax}: #{one_or_many_refs};( of #{tax}: #{one_or_many_refs};)+ of #{tax}: #{one_or_many_refs}$"
regex_split! "^Subspecies of #{tax}: #{one_or_many_refs};( of #{tax}: #{one_or_many_refs};)+ of #{tax}: #{one_or_many_refs}.$"

# ---

regex_split! "^Subspecies of #{tax}: #{one_or_many_refs_w_extras}; of #{tax}: #{one_or_many_refs_w_extras}$"
regex_split! "^Subspecies of #{tax}: #{one_or_many_refs_w_extras}; of #{tax}: #{one_or_many_refs_w_extras}.$"

regex_split! "^Subspecies of #{tax}: #{one_or_many_refs_w_extras};( of #{tax}: #{one_or_many_refs_w_extras};)+ of #{tax}: #{one_or_many_refs_w_extras}$"
regex_split! "^Subspecies of #{tax}: #{one_or_many_refs_w_extras};( of #{tax}: #{one_or_many_refs_w_extras};)+ of #{tax}: #{one_or_many_refs_w_extras}.$"

# ---

Activity.execute_script_activity User.find_by!(name: 'Fredrik Palmkron'), EDIT_SUMMARY

@jonkerz
Copy link
Member Author

jonkerz commented Jul 26, 2020

Batch 2

BATCH = 2
EDIT_SUMMARY = "Split 'Subspecies of' history items [batch #{BATCH}], see %github1120"
ANTCATBOT = User.find_by!(name: 'AntCatBot')

# For activities and PaperTrail.
RequestStore.store[:current_request_uuid] = SecureRandom.uuid
PaperTrail.request.whodunnit = ANTCATBOT.id

def split_subspecies_of_item! old_item, expected_tax: :unknown, dry_run: false
  old_taxt = old_item.taxt.dup
  split = old_taxt.split('; of ')

  taxt_to_update = split.first
  taxt_to_update << '.' unless taxt_to_update.ends_with?('.')

  tail_taxts = split[1..]
  taxts_to_create = tail_taxts.map do |taxt_to_create_part|
    string = "Subspecies of " << taxt_to_create_part
    string << '.' unless string.ends_with?('.')
    string
  end

  puts "ID: #{old_item.id}"
  puts "split.size: #{split.size}"
  puts "original: #{old_taxt}".yellow
  puts "update:   #{taxt_to_update}".green
  taxts_to_create.each do |taxt_to_create|
    puts "create:   #{taxt_to_create}".blue
  end
  puts

  return if dry_run

  old_item.update!(taxt: taxt_to_update)
  new_items = taxts_to_create.reverse.map do |taxt_to_create|
    new_item = TaxonHistoryItem.create!(taxon: old_item.taxon, taxt: taxt_to_create, position: old_item.position + 1)
    create_activity new_item, :create, "from ##{old_item.id}"
  end

  create_activity old_item, :update, "to ##{new_items.map(&:id)}"
end

def create_activity history_item, action, item_edit_summary
  history_item.create_activity(
    action,
    ANTCATBOT,
    edit_summary: "Split 'Subspecies of' history items (split #{item_edit_summary}) [batch #{BATCH}], see %github1120"
  )
end

def regex_split! mysql_regex
  items = TaxonHistoryItem.where("taxt REGEXP ?", mysql_regex); nil
  item_ids = items.pluck(:id); nil # Hack for `dry_run`.

  puts "items: #{items.count}".yellow
  puts "item_ids: #{item_ids.size}".yellow
  puts "total history items: #{TaxonHistoryItem.count}".yellow

  TaxonHistoryItem.where(id: item_ids).find_each do |history_item|
    split_subspecies_of_item! history_item, expected_tax: :balanced, dry_run: false
  end; nil

  puts "items: #{items.count}".yellow
  puts "item_ids: #{item_ids.size}".yellow
  puts "total history items: #{TaxonHistoryItem.count}".yellow
end

tax = "{tax [0-9]+}"
tax_ish = "({tax(ac)? [0-9]+}|{missing[0-9]? .+}|[A-z][a-z]+ [a-z]+( [a-z]+)?)"
one_or_many_tax_ish = "(#{tax_ish}|(#{tax_ish}, )+#{tax_ish}?)"

ref = '({ref [0-9]+}: [0-9a-z]+)'
one_or_many_refs = "(#{ref}|(#{ref}; )+#{ref}?)"

ref_w_extras = '({ref [0-9]+}: [0-9a-z]+( \((in key|in text|footnote)\))?)'
one_or_many_refs_w_extras = "(#{ref_w_extras}|(#{ref_w_extras}; )+#{ref_w_extras}?)"

# ---

regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs}; of #{tax_ish}: #{one_or_many_refs}$"
regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs}; of #{tax_ish}: #{one_or_many_refs}.$"

regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs};( of #{tax_ish}: #{one_or_many_refs};)+ of #{tax_ish}: #{one_or_many_refs}$"
regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs};( of #{tax_ish}: #{one_or_many_refs};)+ of #{tax_ish}: #{one_or_many_refs}.$"

# ---

regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs_w_extras}; of #{tax_ish}: #{one_or_many_refs_w_extras}$"
regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs_w_extras}; of #{tax_ish}: #{one_or_many_refs_w_extras}.$"

regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs_w_extras};( of #{tax_ish}: #{one_or_many_refs_w_extras};)+ of #{tax_ish}: #{one_or_many_refs_w_extras}$"
regex_split! "^Subspecies of #{tax_ish}: #{one_or_many_refs_w_extras};( of #{tax_ish}: #{one_or_many_refs_w_extras};)+ of #{tax_ish}: #{one_or_many_refs_w_extras}.$"

# ---

Activity.execute_script_activity User.find_by!(name: 'Fredrik Palmkron'), EDIT_SUMMARY

@jonkerz jonkerz closed this as completed Jul 27, 2020
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

1 participant