From 66dd3edea3a912503f8b5e5d3cc872589d2ad626 Mon Sep 17 00:00:00 2001 From: evazion Date: Thu, 23 May 2024 13:14:33 -0500 Subject: [PATCH] Fix #5721: Add bookmarklet support for Toyhou.se. This only supports uploading individual images: https://toyhou.se/427Deer#55232380 https://toyhou.se/~images/58037599 https://toyhou.se/19108771.june-human-#58037599 https://toyhou.se/2712983.cudlil/19136842.reference-sheet#73741617 Not entire galleries at once: https://toyhou.se/2712983.cudlil/19136838.art-by-others https://toyhou.se/2712983.cudlil/gallery https://toyhou.se/2712983.cudlil This is because each image in a gallery can have a different artist and different commentary, so they're treated as separate posts. --- app/logical/source/extractor/toyhouse.rb | 56 +++++++ app/logical/source/url.rb | 1 + app/logical/source/url/toyhouse.rb | 93 +++++++++++ test/unit/sources/toyhouse_test.rb | 194 +++++++++++++++++++++++ 4 files changed, 344 insertions(+) create mode 100644 app/logical/source/extractor/toyhouse.rb create mode 100644 app/logical/source/url/toyhouse.rb create mode 100644 test/unit/sources/toyhouse_test.rb diff --git a/app/logical/source/extractor/toyhouse.rb b/app/logical/source/extractor/toyhouse.rb new file mode 100644 index 000000000..6fe565f6e --- /dev/null +++ b/app/logical/source/extractor/toyhouse.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +# @see Source::URL::Toyhouse +class Source::Extractor::Toyhouse < Source::Extractor + def image_urls + # Only use the image URL as-is if we can't get the full size image from the page. + if parsed_url.image_url? && image_url_from_page.nil? + [parsed_url.to_s] + else + [image_url_from_page].compact + end + end + + def image_url_from_page + page&.at("#content img")&.attr(:src) + end + + # Note that this can be a different artist on a different website. + # Ex: https://toyhou.se/2712983.cudlil/19136838.art-by-others#58116270 + def profile_url + page&.at(".image-credits .artist-credit a")&.attr(:href)&.then { |url| Source::URL.profile_url(url) || url } + end + + def display_name + name = page&.at(".image-credits .artist-credit a")&.text + name unless name&.match?(%r{^https?://}) + end + + def tags + page&.css(".image-characters .character-name-badge").to_a.map do |element| + url = URI.join("https://toyhou.se", element[:href]).to_s + + # cudlil (art by others) + # June (Human) + character_name = element.children.first.text.delete_suffix(" (") + + [character_name, url] + end + end + + def artist_commentary_desc + page&.at(".image-description.user-content")&.to_html + end + + def dtext_artist_commentary_desc + DText.from_html(artist_commentary_desc, base_url: "https://toyhou.se") + end + + def image_id + parsed_url.image_id || parsed_referer&.image_id + end + + memoize def page + http.cache(1.minute).parsed_get("https://toyhou.se/~images/#{image_id}") if image_id.present? + end +end diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index 6db22634e..76cfc073a 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -106,6 +106,7 @@ class URL < Danbooru::URL Source::URL::URLShortener, Source::URL::Redgifs, Source::URL::Carrd, + Source::URL::Toyhouse, ] # Parse a URL into a subclass of Source::URL, or raise an exception if the URL is not a valid HTTP or HTTPS URL. diff --git a/app/logical/source/url/toyhouse.rb b/app/logical/source/url/toyhouse.rb new file mode 100644 index 000000000..34d372dda --- /dev/null +++ b/app/logical/source/url/toyhouse.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +class Source::URL::Toyhouse < Source::URL + attr_reader :image_id, :image_hash, :character_id, :character_name, :gallery_id, :gallery_name, :username + + def self.match?(url) + url.domain == "toyhou.se" + end + + def parse + case [subdomain, domain, *path_segments] + + # https://f2.toyhou.se/file/f2-toyhou-se/characters/19108771?1670101610 + # https://file.toyhou.se/characters/654769?1480733146 + in *, "characters", character_id if image_url? + @character_id = character_id + + # https://f2.toyhou.se/file/f2-toyhou-se/thumbnails/58037599_Ov5.png + # https://f2.toyhou.se/file/f2-toyhou-se/images/58037599_Ov5j4w66lQRw9G4.png + # https://f2.toyhou.se/file/f2-toyhou-se/watermarks/73741617_fFIUcJscE.png + # https://file.toyhou.se/images/2362055_rxkHiEqZOFFaOtX.png + # https://file.toyhou.se/thumbnails/2362055_rxk.png + in *, ("thumbnails" | "watermarks" | "images"), /^\d+_[a-zA-Z0-9]+\./ if image_url? + @image_id, _ = filename.split("_") + + # https://toyhou.se/~images/58037599 + in _, "toyhou.se", "~images", image_id + @image_id = image_id + + # https://toyhou.se/2712983.cudlil/19136842.reference-sheet/73741617 + in _, "toyhou.se", /^\d+\./ => character, /^\d+\./ => gallery, /^\d+$/ => image_id + @character_id, _, @character_name = character.partition(".") + @gallery_id, _, @gallery_name = gallery.partition(".") + @image_id = image_id + + # https://toyhou.se/2712983.cudlil/19136842.reference-sheet#73741617 + in _, "toyhou.se", /^\d+\./ => character, /^\d+\./ => gallery + @character_id, _, @character_name = character.partition(".") + @gallery_id, _, @gallery_name = gallery.partition(".") + @image_id = fragment if fragment&.match?(/^\d+$/) + + # https://toyhou.se/19108771.june-human-/58037599 + in _, "toyhou.se", /^\d+\./ => character, /^\d+$/ => image_id + @character_id, _, @character_name = character.partition(".") + @image_id = image_id + + # https://toyhou.se/19108771.june-human-#58037599 + # https://toyhou.se/19108771.june-human-/gallery + in _, "toyhou.se", /^\d+\./ => character, *rest + @character_id, _, @character_name = character.partition(".") + @image_id = fragment if fragment&.match?(/^\d+$/) + + # https://toyhou.se/427Deer + # https://toyhou.se/427Deer#55232380 + # https://toyhou.se/427Deer/characters + # https://toyhou.se/lilcudds/characters/folder:539748 + in _, "toyhou.se", username, *rest unless username.starts_with?("~") + @username = username + @image_id = fragment if fragment&.match?(/^\d+$/) + + # https://f2.toyhou.se/file/f2-toyhou-se/users/Missing_teeth?965 (profile picture) + # https://toyhou.se/~forums/71.art-marketplace/36671.-c-o-m-m-i-s-s-i-o-n-open- (forum post) + else + nil + end + end + + def image_url? + host.in?(%w[f2.toyhou.se file.toyhou.se]) + end + + def bad_source? + !image_url? && image_id.blank? + end + + def page_url + if character_id.present? && character_name.present? && gallery_id.present? && gallery_name.present? && image_id.present? + "https://toyhou.se/#{character_id}.#{character_name}/#{gallery_id}.#{gallery_name}/#{image_id}" + elsif character_id.present? && character_name.present? && gallery_id.present? && gallery_name.present? + "https://toyhou.se/#{character_id}.#{character_name}/#{gallery_id}.#{gallery_name}" + elsif character_id.present? && character_name.present? && image_id.present? + "https://toyhou.se/#{character_id}.#{character_name}/#{image_id}" + elsif character_id.present? && character_name.present? + "https://toyhou.se/#{character_id}.#{character_name}" + elsif image_id.present? + "https://toyhou.se/~images/#{image_id}" + end + end + + def profile_url + "https://toyhou.se/#{username}" if username.present? + end +end diff --git a/test/unit/sources/toyhouse_test.rb b/test/unit/sources/toyhouse_test.rb new file mode 100644 index 000000000..5a54dc65e --- /dev/null +++ b/test/unit/sources/toyhouse_test.rb @@ -0,0 +1,194 @@ +# frozen_string_literal: true + +require "test_helper" + +module Sources + class ToyhouseTest < ActiveSupport::TestCase + context "Toyhou.se:" do + context "A thumbnail image URL" do + strategy_should_work( + "https://f2.toyhou.se/file/f2-toyhou-se/thumbnails/73744030_WfK.png", + image_urls: %w[https://f2.toyhou.se/file/f2-toyhou-se/watermarks/73744030_WfKyU9fkJ.png], + media_files: [{ file_size: 2_294_417 }], + page_url: "https://toyhou.se/~images/73744030", + profile_url: "https://toyhou.se/lilcudds", + profile_urls: %w[https://toyhou.se/lilcudds], + display_name: "lilcudds", + username: nil, + tags: [ + ["cudlil", "https://toyhou.se/2712983.cudlil/19136829.art-by-me"], + ], + dtext_artist_commentary_title: "", + dtext_artist_commentary_desc: "her current design was a wip, these colors are off! !" + ) + end + + context "A full image URL" do + strategy_should_work( + "https://f2.toyhou.se/file/f2-toyhou-se/images/58037599_Ov5j4w66lQRw9G4.png", + image_urls: %w[https://f2.toyhou.se/file/f2-toyhou-se/images/58037599_Ov5j4w66lQRw9G4.png], + media_files: [{ file_size: 735_586 }], + page_url: "https://toyhou.se/~images/58037599", + profile_url: "https://toyhou.se/427Deer", + profile_urls: %w[https://toyhou.se/427Deer], + display_name: "427Deer", + username: nil, + tags: [ + ["June (Human)", "https://toyhou.se/19108771.june-human-"], + ], + dtext_artist_commentary_title: "", + dtext_artist_commentary_desc: "" + ) + end + + context "A character image URL" do + strategy_should_work( + "https://f2.toyhou.se/file/f2-toyhou-se/characters/19108771?1670101610", + image_urls: %w[https://f2.toyhou.se/file/f2-toyhou-se/characters/19108771?1670101610], + media_files: [{ file_size: 23_176 }], + page_url: nil, + profile_urls: [], + display_name: nil, + username: nil, + tags: [], + dtext_artist_commentary_title: "", + dtext_artist_commentary_desc: "" + ) + end + + context "A post by the artist credited to themselves" do + strategy_should_work( + "https://toyhou.se/2712983.cudlil/19136829.art-by-me#73744030", + image_urls: %w[https://f2.toyhou.se/file/f2-toyhou-se/watermarks/73744030_WfKyU9fkJ.png], + media_files: [{ file_size: 2_294_417 }], + page_url: "https://toyhou.se/2712983.cudlil/19136829.art-by-me/73744030", + profile_url: "https://toyhou.se/lilcudds", + profile_urls: %w[https://toyhou.se/lilcudds], + display_name: "lilcudds", + username: nil, + tags: [ + ["cudlil", "https://toyhou.se/2712983.cudlil/19136829.art-by-me"], + ], + dtext_artist_commentary_title: "", + dtext_artist_commentary_desc: "her current design was a wip, these colors are off! !" + ) + end + + context "A post with multiple characters and credited to a different Toyhou.se user" do + strategy_should_work( + "https://toyhou.se/2712983.cudlil/19136838.art-by-others/58116407", + image_urls: %w[https://f2.toyhou.se/file/f2-toyhou-se/watermarks/58116407_mAmE0QkZN.png?1700502692], + media_files: [{ file_size: 1_127_805 }], + page_url: "https://toyhou.se/2712983.cudlil/19136838.art-by-others/58116407", + profile_url: "https://toyhou.se/mrstinky_org", + profile_urls: %w[https://toyhou.se/mrstinky_org], + display_name: "mrstinky_org", + username: nil, + tags: [ + ["cudlil", "https://toyhou.se/2712983.cudlil/19136838.art-by-others"], + ["bear bro", "https://toyhou.se/19228770.bear-bro"], + ["cuddles", "https://toyhou.se/19228781.cuddles"], + ], + dtext_artist_commentary_title: "", + dtext_artist_commentary_desc: "" + ) + end + + context "A post credited to a different artist on a different site" do + strategy_should_work( + "https://toyhou.se/~images/58116270", + image_urls: %w[https://f2.toyhou.se/file/f2-toyhou-se/watermarks/58116270_BOOKN8MO8.jpg?1700502658], + media_files: [{ file_size: 895_860 }], + page_url: "https://toyhou.se/~images/58116270", + profile_url: "https://twitter.com/Gaziter", + profile_urls: %w[https://twitter.com/Gaziter], + display_name: "Gaziter", + username: nil, + tags: [ + ["cudlil", "https://toyhou.se/2712983.cudlil/19136838.art-by-others"], + ], + dtext_artist_commentary_title: "", + dtext_artist_commentary_desc: <<~EOS.chomp + i don't own the other 3 characters featured!! my persona is the bottom left char + EOS + ) + end + + context "A post credited to an artist without a display name" do + strategy_should_work( + "https://toyhou.se/1.firestarter/gallery#153", + image_urls: %w[https://f2.toyhou.se/file/f2-toyhou-se/images/153_bHwg5E1xvWGjmeU.png?1499247051], + media_files: [{ file_size: 889_161 }], + page_url: "https://toyhou.se/1.firestarter/153", + profile_url: "https://www.furaffinity.net/user/keii", + profile_urls: %w[https://www.furaffinity.net/user/keii], + display_name: nil, + username: nil, + tags: [ + ["Firestarter", "https://toyhou.se/1.firestarter"], + ], + dtext_artist_commentary_title: "", + dtext_artist_commentary_desc: "" + ) + end + + # XXX Galleries aren't supported. + context "A character gallery" do + strategy_should_work( + "https://toyhou.se/2712983.cudlil/19136838.art-by-others", + image_urls: [], + page_url: "https://toyhou.se/2712983.cudlil/19136838.art-by-others", + profile_urls: [], + display_name: nil, + username: nil, + tags: [], + dtext_artist_commentary_title: "", + dtext_artist_commentary_desc: "" + ) + end + + context "A deleted or nonexistent post" do + strategy_should_work( + "https://toyhou.se/~images/999999999", + image_urls: [], + media_files: [], + page_url: "https://toyhou.se/~images/999999999", + profile_urls: [], + display_name: nil, + username: nil, + tags: [], + dtext_artist_commentary_title: "", + dtext_artist_commentary_desc: "" + ) + end + + should "Parse URLs correctly" do + assert(Source::URL.image_url?("https://f2.toyhou.se/file/f2-toyhou-se/thumbnails/58037599_Ov5.png")) + assert(Source::URL.image_url?("https://f2.toyhou.se/file/f2-toyhou-se/images/58037599_Ov5j4w66lQRw9G4.png")) + assert(Source::URL.image_url?("https://f2.toyhou.se/file/f2-toyhou-se/watermarks/73741617_fFIUcJscE.png")) + assert(Source::URL.image_url?("https://f2.toyhou.se/file/f2-toyhou-se/characters/19108771?1670101610")) + assert(Source::URL.image_url?("https://file.toyhou.se/images/2362055_rxkHiEqZOFFaOtX.png")) + assert(Source::URL.image_url?("https://file.toyhou.se/characters/654769?1480733146")) + + assert(Source::URL.page_url?("https://toyhou.se/~images/58037599")) + assert(Source::URL.page_url?("https://toyhou.se/2712983.cudlil/19136842.reference-sheet/73741617")) + assert(Source::URL.page_url?("https://toyhou.se/2712983.cudlil/19136842.reference-sheet#73741617")) + assert(Source::URL.page_url?("https://toyhou.se/2712983.cudlil/19136842.reference-sheet")) + assert(Source::URL.page_url?("https://toyhou.se/19108771.june-human-/58037599")) + assert(Source::URL.page_url?("https://toyhou.se/19108771.june-human-#58037599")) + assert(Source::URL.page_url?("https://toyhou.se/19108771.june-human-/gallery")) + assert(Source::URL.page_url?("https://toyhou.se/19108771.june-human-")) + assert(Source::URL.page_url?("https://toyhou.se/427Deer#55232380")) + + assert(Source::URL.profile_url?("https://toyhou.se/427Deer")) + assert(Source::URL.profile_url?("https://toyhou.se/427Deer/characters")) + assert(Source::URL.profile_url?("https://toyhou.se/lilcudds/characters/folder:539748")) + + assert(Source::URL.parse("https://toyhou.se/2712983.cudlil/19136842.reference-sheet").bad_source?) + assert(Source::URL.parse("https://toyhou.se/19108771.june-human-/gallery").bad_source?) + assert(Source::URL.parse("https://toyhou.se/19108771.june-human-").bad_source?) + assert(Source::URL.parse("https://toyhou.se/427Deer").bad_source?) + end + end + end +end