-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Convert quadrinomials to infrasubspecies script #819
Comments
Batch 1: Quadrinomials where the target subspecies exists
Scriptjonkerz = User.find 60
Activity.execute_script_activity jonkerz, "Convert quadrinomials to infrasubspecies [batch 1], see %github819"
# For PaperTrail
antcat_bot = User.find 62
PaperTrail.request.whodunnit = antcat_bot.id
def quadrinomials
Subspecies.joins(:name).where("(LENGTH(names.name) - LENGTH(REPLACE(names.name, ' ', '')) >= 3) ")
end
def puts_stats
"Quadrinomials count: #{quadrinomials.count}"
end
def fix! soon_infrasubspecies, antcat_bot
name_string = soon_infrasubspecies.name_cache
raise "#{soon_infrasubspecies.id}: has soft-validation issues" if soon_infrasubspecies.soft_validation_warnings.size.positive?
raise "#{soon_infrasubspecies.id}: #{name_string} contains weird characters" unless name_string =~ /^[[:alpha:][:blank:]-]+$/
target_subspecies_name_string = name_string.split[0..2].join(' ')
target_subspecies = Subspecies.where(name_cache: target_subspecies_name_string)
target_subspecies_count = target_subspecies.count
if target_subspecies_count == 0
puts "#{soon_infrasubspecies.id}: found no subspecies".blue
return
end
if target_subspecies_count > 1
raise "#{soon_infrasubspecies.id}: found too many subspecies"
end
the_target_subspecies = target_subspecies.first
Taxon.transaction do
soon_infrasubspecies.name.update!(type: 'InfrasubspeciesName')
soon_infrasubspecies.update_columns(type: 'Infrasubspecies')
infrasubspecies = Infrasubspecies.find(soon_infrasubspecies.id)
infrasubspecies.update!(subspecies: the_target_subspecies)
puts "#{soon_infrasubspecies.id}: fixed!".green
if infrasubspecies.soft_validation_warnings.size.positive?
puts "#{soon_infrasubspecies.id}: but it has soft-validation issues...".red
end
infrasubspecies.create_activity :update, antcat_bot, edit_summary: "Convert subspecies quadrinomial to infrasubspecies quadrinomial, see %github819"
end
end
puts_stats
quadrinomials.each do |taxon|
fix! taxon, antcat_bot
end; nil
puts_stats
puts "Done" ResultsQuadrinomials count: 763 430274: found no subspecies Quadrinomials count: 474 |
Step 2: Re-create missing subspecies
Scriptjonkerz = User.find(60)
Activity.execute_script_activity jonkerz, "Reified missing subspecies for quadrinomial, see %github819"
# For PaperTrail
def antcat_bot
@antcat_bot ||= User.find(62)
end
PaperTrail.request.whodunnit = antcat_bot.id
$reified_with_issues = []
$reified_without_issues = []
def quadrinomials
Subspecies.joins(:name).where("(LENGTH(names.name) - LENGTH(REPLACE(names.name, ' ', '')) >= 3) ")
end
def reify_subspecies subspecies_version
puts "reify_subspecies.... version #{subspecies_version.id}".blue
if (taxon = Taxon.where(id: subspecies_version.item_id).exists?)
puts "taxon exists now".green
return taxon
end
reified = subspecies_version.reify
unless reified.is_a?(Subspecies)
raise "reified subspecies is not a subspecies".red
end
unless reified.species
puts "reified subspecies has no species".red
return
end
if reified.status == Status::UNAVAILABLE_UNCATEGORIZED && reified.current_valid_taxon.nil?
puts "#{subspecies_version.id}: reified subspecies has current_valid_taxon".red
return
end
raise "Name #{reified.name_id} already exists" if Name.where(id: reified.name_id).exists?
reified_name = PaperTrail::Version.where(item_type: "Name", item_id: reified.name_id).last&.reify
raise "found no name to reify" unless reified_name
Taxon.transaction do
if reified_name && reified_name.save!
reified.ichnotaxon ||= false
reified.nomen_nudum ||= false
reified.collective_group_name ||= false
reified.update!(name: reified_name)
puts "reified #{reified.id}".green
end
end
if reified.persisted?
if reified.soft_validation_warnings.present?
puts "#{reified.id}: reified has soft-validation issues...".red
$reified_with_issues << reified.id
reified.create_activity :create, antcat_bot, edit_summary: "Reified missing subspecies for quadrinomial (taxon has soft-validation issues), see %github819"
else
$reified_without_issues << reified.id
reified.create_activity :create, antcat_bot, edit_summary: "Reified missing subspecies for quadrinomial, see %github819"
end
return reified
else
raise "could not persist subspecies version #{subspecies_version.id}"
end
end
quadrinomials.each do |taxon|
name_string = taxon.name_cache
raise "#{taxon.id}: has soft-validation issues" if taxon.soft_validation_warnings.size.positive?
raise "#{taxon.id}: #{name_string} contains weird characters" unless name_string =~ /^[[:alpha:][:blank:]-]+$/
target_subspecies_name_string = name_string.split[0..2].join(' ')
subspecies_version = PaperTrail::Version.where(item_type: "Taxon").where("object LIKE ?", "%name_cache: #{target_subspecies_name_string}\n%").last
if subspecies_version
reify_subspecies subspecies_version
else
puts "no subspecies version".red
end
end; nil
puts "reified_with_issues = #{$reified_with_issues}"
puts "reified_without_issues = #{$reified_without_issues}"
puts "all_reified = #{$reified_with_issues + $reified_without_issues}"
puts "Done" Resultsreified_with_issues = [497287, 505104, 504885, 491154, 505448, 491526, 490467, 503153, 464741, 490044, 490574, 465957, 491372, 491120, 490618, 490074, 468634, 505895, 503332, 506856, 493406, 474883, 489783, 479474, 504415]
reified_without_issues = [495560, 462941, 462965, 463175, 463496, 463950, 464622, 465638, 466281, 464310, 464240, 484921, 465222, 466309, 465243, 490480, 466590, 466785, 502707, 490110, 484327, 495209, 466360, 484067, 484544, 466679, 464484, 485279, 466965, 467194, 485524, 467899, 467839, 467768, 468269, 492242, 468586, 468180, 467863, 485569, 485547, 468912, 469146, 469078, 503579, 469088, 469835, 470938, 470915, 470764, 470233, 486559, 471473, 471524, 471512, 471780, 472253, 506348, 472871, 473260, 473004, 474014, 473690, 474020, 473799, 473844, 473985, 474100, 487479, 474205, 475486, 475633, 475542, 477616, 476479, 476610, 476616, 477908, 477856, 487997, 488218, 488179, 477886, 488255, 488112, 478282, 478521, 488143, 488261, 488100, 478098, 477876, 488161, 488205, 479026, 488823, 488707, 479605, 479793, 488826, 488811, 488936, 480053, 488954, 480405, 480867, 481533, 481701, 481039, 481857, 489463, 482134, 482352, 466921, 471750, 472177, 480387, 483108, 466001, 465681]
all_reified = [497287, 505104, 504885, 491154, 505448, 491526, 490467, 503153, 464741, 490044, 490574, 465957, 491372, 491120, 490618, 490074, 468634, 505895, 503332, 506856, 493406, 474883, 489783, 479474, 504415, 495560, 462941, 462965, 463175, 463496, 463950, 464622, 465638, 466281, 464310, 464240, 484921, 465222, 466309, 465243, 490480, 466590, 466785, 502707, 490110, 484327, 495209, 466360, 484067, 484544, 466679, 464484, 485279, 466965, 467194, 485524, 467899, 467839, 467768, 468269, 492242, 468586, 468180, 467863, 485569, 485547, 468912, 469146, 469078, 503579, 469088, 469835, 470938, 470915, 470764, 470233, 486559, 471473, 471524, 471512, 471780, 472253, 506348, 472871, 473260, 473004, 474014, 473690, 474020, 473799, 473844, 473985, 474100, 487479, 474205, 475486, 475633, 475542, 477616, 476479, 476610, 476616, 477908, 477856, 487997, 488218, 488179, 477886, 488255, 488112, 478282, 478521, 488143, 488261, 488100, 478098, 477876, 488161, 488205, 479026, 488823, 488707, 479605, 479793, 488826, 488811, 488936, 480053, 488954, 480405, 480867, 481533, 481701, 481039, 481857, 489463, 482134, 482352, 466921, 471750, 472177, 480387, 483108, 466001, 465681] |
Batch 2: Quadrinomials where a
|
Batch 3/4/5
Script
EDIT_SUMMARY = 'Convert quadrinomials to infrasubspecies [batch 5], see %github819'
ANTCATBOT = User.find_by!(name: 'AntCatBot')
# For activities and PaperTrail.
RequestStore.store[:current_request_uuid] = SecureRandom.uuid
PaperTrail.request.whodunnit = ANTCATBOT.id
def quadrinomials
Subspecies.joins(:name).where("(LENGTH(names.name) - LENGTH(REPLACE(names.name, ' ', '')) >= 3) ")
end
def puts_stats
puts <<~STATS
Quadrinomials count: #{quadrinomials.count}
Infrasubspecies count: #{Infrasubspecies.count}
STATS
end
def fix! soon_infrasubspecies, antcat_bot, edit_summary
name_string = soon_infrasubspecies.name_cache
raise "#{soon_infrasubspecies.id}: has soft-validation issues" if soon_infrasubspecies.soft_validations.failed?
raise "#{soon_infrasubspecies.id}: #{name_string} contains weird characters" unless name_string =~ /^[[:alpha:][:blank:]-]+$/
target_subspecies_name_string = name_string.split[0..2].join(' ')
target_subspecies = Subspecies.where(name_cache: target_subspecies_name_string)
target_subspecies_count = target_subspecies.count
if target_subspecies_count == 0
puts "#{soon_infrasubspecies.id}: found no subspecies".blue
return
end
if target_subspecies_count > 1
raise "#{soon_infrasubspecies.id}: found too many subspecies"
end
the_target_subspecies = target_subspecies.first
the_target_subspecies_validation_issues = the_target_subspecies.soft_validations.failed.reject do |validation|
validation.database_script.is_a?(DatabaseScripts::NonValidTaxaWithACurrentTaxonThatIsNotValid)
end
if the_target_subspecies_validation_issues.present?
puts "target subspecies #{the_target_subspecies.id} has soft-validation issues"
return
end
Taxon.transaction do
soon_infrasubspecies.name.update!(type: 'InfrasubspeciesName')
soon_infrasubspecies.update_columns(type: 'Infrasubspecies')
infrasubspecies = Infrasubspecies.find(soon_infrasubspecies.id)
infrasubspecies.update!(subspecies: the_target_subspecies)
puts "#{soon_infrasubspecies.id}: fixed!".green
if infrasubspecies.soft_validations.failed.present?
puts "#{soon_infrasubspecies.id}: but it has soft-validation issues...".red
end
infrasubspecies.create_activity :update, antcat_bot, edit_summary: edit_summary
end
end
puts_stats
quadrinomials.each do |taxon|
fix! taxon, ANTCATBOT, EDIT_SUMMARY
end; nil
Activity.execute_script_activity User.find_by!(name: 'Fredrik Palmkron'), EDIT_SUMMARY
puts_stats
puts "Done"
Output 3
Quadrinomials count: 248
Infrasubspecies count: 491
430288: fixed!
430355: fixed!
430770: fixed!
430771: fixed!
431244: fixed!
431284: fixed!
431285: fixed!
431426: found no subspecies
431824: fixed!
431825: fixed!
431902: found no subspecies
431925: fixed!
431949: found no subspecies
431955: found no subspecies
432115: found no subspecies
432243: fixed!
432298: fixed!
432303: fixed!
432304: fixed!
432306: fixed!
432326: fixed!
432337: found no subspecies
432435: fixed!
432495: fixed!
432650: fixed!
432663: found no subspecies
432683: fixed!
432723: fixed!
432747: fixed!
432758: found no subspecies
432770: fixed!
432837: fixed!
432926: found no subspecies
433012: fixed!
433082: fixed!
433083: fixed!
433085: fixed!
433149: fixed!
433167: found no subspecies
433209: fixed!
433261: fixed!
433293: found no subspecies
433336: fixed!
433338: fixed!
433385: fixed!
433396: fixed!
433522: fixed!
433622: fixed!
433644: fixed!
433776: fixed!
433841: fixed!
433858: fixed!
433930: found no subspecies
433946: fixed!
433968: fixed!
433992: fixed!
434372: fixed!
434373: fixed!
434378: fixed!
434379: fixed!
434384: fixed!
434394: fixed!
434398: fixed!
434413: found no subspecies
434415: fixed!
434442: fixed!
434492: fixed!
434504: fixed!
434511: fixed!
434621: fixed!
434740: fixed!
434857: found no subspecies
435181: fixed!
435246: fixed!
435373: fixed!
435378: fixed!
435396: found no subspecies
435431: fixed!
435450: found no subspecies
435470: fixed!
435481: fixed!
435512: fixed!
435530: fixed!
435555: found no subspecies
435753: fixed!
435773: fixed!
435801: fixed!
435856: fixed!
435875: fixed!
435962: fixed!
436060: fixed!
436066: fixed!
436092: fixed!
436104: fixed!
436324: found no subspecies
436329: found no subspecies
436341: found no subspecies
436358: fixed!
436359: found no subspecies
436768: fixed!
436916: fixed!
436923: fixed!
436925: fixed!
436942: fixed!
437006: fixed!
437108: fixed!
437266: fixed!
437293: fixed!
437354: fixed!
437382: fixed!
437384: fixed!
437455: fixed!
437575: fixed!
437579: fixed!
437627: fixed!
437753: fixed!
437827: fixed!
437830: fixed!
437841: found no subspecies
437846: fixed!
438026: found no subspecies
438160: fixed!
438169: fixed!
438172: found no subspecies
438492: fixed!
438500: fixed!
438524: fixed!
438542: fixed!
438571: fixed!
438647: fixed!
438936: fixed!
438974: fixed!
439015: found no subspecies
439365: fixed!
439546: fixed!
439558: fixed!
439569: found no subspecies
439583: fixed!
439602: fixed!
439603: found no subspecies
439604: found no subspecies
439649: found no subspecies
439654: found no subspecies
439843: fixed!
439891: fixed!
440011: fixed!
440051: fixed!
440066: fixed!
440079: fixed!
440108: fixed!
440134: fixed!
440151: fixed!
440176: found no subspecies
440177: found no subspecies
440185: fixed!
440196: fixed!
440230: fixed!
440237: fixed!
440251: fixed!
440342: found no subspecies
440354: fixed!
440356: fixed!
440486: fixed!
440523: fixed!
440551: fixed!
440570: fixed!
440716: found no subspecies
440720: fixed!
440728: fixed!
440778: fixed!
440795: fixed!
440830: found no subspecies
440863: fixed!
440888: found no subspecies
441392: fixed!
441425: found no subspecies
441484: fixed!
441666: fixed!
442478: fixed!
442494: fixed!
442516: fixed!
442539: fixed!
442586: fixed!
442589: fixed!
442600: fixed!
443125: fixed!
443340: fixed!
443659: found no subspecies
443752: fixed!
443828: fixed!
443954: fixed!
444639: found no subspecies
444871: found no subspecies
444876: found no subspecies
444886: found no subspecies
444888: found no subspecies
444941: found no subspecies
445123: fixed!
445189: fixed!
445249: fixed!
445627: fixed!
446125: fixed!
446357: found no subspecies
446714: fixed!
446734: fixed!
446742: found no subspecies
446748: fixed!
447218: fixed!
447267: fixed!
447429: fixed!
447675: fixed!
449304: found no subspecies
450089: fixed!
450090: found no subspecies
450175: found no subspecies
450184: found no subspecies
450227: fixed!
450252: fixed!
450288: fixed!
450307: found no subspecies
450351: fixed!
450356: fixed!
450370: found no subspecies
450480: fixed!
450618: fixed!
450639: found no subspecies
450681: found no subspecies
450748: found no subspecies
450760: found no subspecies
450796: fixed!
450906: fixed!
451087: found no subspecies
451357: found no subspecies
451376: fixed!
451399: fixed!
451424: fixed!
456717: found no subspecies
457066: found no subspecies
457083: found no subspecies
457255: found no subspecies
457703: found no subspecies
457780: found no subspecies
458031: found no subspecies
458333: found no subspecies
458338: found no subspecies
459909: found no subspecies
461383: found no subspecies
508189: found no subspecies
Quadrinomials count: 70
Infrasubspecies count: 669 Output 4
Quadrinomials count: 70
Infrasubspecies count: 670
431426: found no subspecies
431902: found no subspecies
431949: found no subspecies
431955: fixed!
432115: found no subspecies
432337: found no subspecies
432663: found no subspecies
432758: found no subspecies
432926: found no subspecies
433167: found no subspecies
433293: found no subspecies
433930: found no subspecies
434413: found no subspecies
434857: found no subspecies
435396: found no subspecies
435450: found no subspecies
435555: found no subspecies
436324: found no subspecies
436329: found no subspecies
436341: found no subspecies
436359: found no subspecies
437841: fixed!
438026: fixed!
438172: found no subspecies
439015: fixed!
439569: found no subspecies
439603: found no subspecies
439604: found no subspecies
439649: fixed!
439654: fixed!
440176: fixed!
440177: found no subspecies
440342: found no subspecies
440716: fixed!
440830: fixed!
440888: found no subspecies
441425: fixed!
443659: found no subspecies
444639: found no subspecies
444871: found no subspecies
444876: found no subspecies
444886: found no subspecies
444888: found no subspecies
444941: fixed!
446357: fixed!
446742: fixed!
449304: found no subspecies
450090: fixed!
450175: fixed!
450184: fixed!
450307: found no subspecies
450370: fixed!
450639: found no subspecies
450681: fixed!
450748: fixed!
450760: fixed!
451087: found no subspecies
451357: found no subspecies
456717: fixed!
457066: found no subspecies
457083: fixed!
457255: found no subspecies
457703: fixed!
457780: fixed!
458031: fixed!
458333: found no subspecies
458338: found no subspecies
459909: fixed!
461383: found no subspecies
508189: fixed!
Quadrinomials count: 43
Infrasubspecies count: 697
Output 4
Quadrinomials count: 43
Infrasubspecies count: 700
431426: fixed!
431902: fixed!
431949: fixed!
432115: fixed!
432337: fixed!
432663: fixed!
432758: fixed!
432926: fixed!
433167: fixed!
433293: fixed!
433930: fixed!
434413: fixed!
434857: fixed!
435396: fixed!
435450: fixed!
435555: fixed!
436324: fixed!
436329: fixed!
436341: fixed!
436359: fixed!
438172: fixed!
439569: fixed!
439603: fixed!
439604: fixed!
440177: fixed!
440342: fixed!
440888: fixed!
443659: fixed!
444639: fixed!
444871: fixed!
444876: fixed!
444886: fixed!
444888: fixed!
449304: fixed!
450307: fixed!
450639: fixed!
451087: fixed!
451357: fixed!
457066: fixed!
457255: fixed!
458333: fixed!
458338: fixed!
461383: fixed!
Quadrinomials count: 0
Infrasubspecies count: 743 |
See #714, AC issue: https://antcat.org/issues/41
TODO:
The text was updated successfully, but these errors were encountered: