From 0fc1c70b483c44e4463ca2e41bc99692b0dc2717 Mon Sep 17 00:00:00 2001 From: ChunkyProgrammer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Sat, 15 Jul 2023 07:24:56 -0700 Subject: [PATCH] Move structs and parsing logic to hashtag.cr --- src/invidious/hashtag.cr | 141 +++++++++++++++----- src/invidious/helpers/serialized_yt_data.cr | 59 -------- src/invidious/yt_backend/extractors.cr | 34 ----- 3 files changed, 105 insertions(+), 129 deletions(-) diff --git a/src/invidious/hashtag.cr b/src/invidious/hashtag.cr index fd468ed60..15faba7c2 100644 --- a/src/invidious/hashtag.cr +++ b/src/invidious/hashtag.cr @@ -1,37 +1,118 @@ module Invidious::Hashtag extend self + struct HashtagPage + include DB::Serializable + + property videos : Array(SearchItem) | Array(Video) + property header : HashtagHeader? + property has_next_continuation : Bool + + def to_json(locale : String?, json : JSON::Builder) + json.object do + json.field "type", "hashtag" + if self.header != nil + json.field "header" do + self.header.to_json(json) + end + end + json.field "results" do + json.array do + self.videos.each do |item| + item.to_json(locale, json) + end + end + end + json.field "hasNextPage", self.has_next_continuation + end + end + + # TODO: remove the locale and follow the crystal convention + def to_json(locale : String?, _json : Nil) + JSON.build do |json| + to_json(locale, json) + end + end + + def to_json(json : JSON::Builder) + to_json(nil, json) + end + end + + struct HashtagHeader + include DB::Serializable + + property tag : String + property channel_count : Int64 + property video_count : Int64 + + def to_json(json : JSON::Builder) + json.object do + json.field "hashtag", self.tag + json.field "channelCount", self.channel_count + json.field "videoCount", self.video_count + end + end + + def to_json(_json : Nil) + JSON.build do |json| + to_json(json) + end + end + end + def fetch(hashtag : String, page : Int, region : String? = nil) : HashtagPage cursor = (page - 1) * 60 header = nil client_config = YoutubeAPI::ClientConfig.new(region: region) - # for any page besides the first page, get the list of videos + item = generate_continuation(hashtag, cursor) + # item is a ctoken if cursor > 0 - ctoken = generate_continuation(hashtag, cursor) - response = YoutubeAPI.browse(continuation: ctoken, client_config: client_config) + response = YoutubeAPI.browse(continuation: item, client_config: client_config) else - # get first page + header info - response = YoutubeAPI.browse("FEhashtag", params: get_hashtag_first_page(hashtag), client_config: client_config) - if header = response.dig?("header") - header = parse_item(header).as(HashtagHeader) + # item browses the first page (including metadata) + response = YoutubeAPI.browse("FEhashtag", params: item, client_config: client_config) + if item_contents = response.dig?("header", "hashtagHeaderRenderer") + header = parse_hashtag_renderer(item_contents) end end - items, _ = extract_items(response) + items, next_continuation = extract_items(response) return HashtagPage.new({ - videos: items, - header: header, + videos: items, + header: header, + has_next_continuation: next_continuation != nil, }) end - def get_hashtag_first_page(hashtag : String) + def generate_continuation(hashtag : String, cursor : Int) object = { - "93:embedded" => { + "93:2:embedded" => { "1:string" => hashtag, "2:varint" => 0_i64, "3:varint" => 1_i64, }, } + if cursor > 0 + object = { + "80226972:embedded" => { + "2:string" => "FEhashtag", + "3:base64" => { + "1:varint" => 60_i64, # result count + "15:base64" => { + "1:varint" => cursor.to_i64, + "2:varint" => 0_i64, + }, + "93:2:embedded" => { + "1:string" => hashtag, + "2:varint" => 0_i64, + "3:varint" => 1_i64, + }, + }, + "35:string" => "browse-feedFEhashtag", + }, + } + end return object.try { |i| Protodec::Any.cast_json(i) } .try { |i| Protodec::Any.from_json(i) } @@ -39,31 +120,19 @@ module Invidious::Hashtag .try { |i| URI.encode_www_form(i) } end - def generate_continuation(hashtag : String, cursor : Int) - object = { - "80226972:embedded" => { - "2:string" => "FEhashtag", - "3:base64" => { - "1:varint" => 60_i64, # result count - "15:base64" => { - "1:varint" => cursor.to_i64, - "2:varint" => 0_i64, - }, - "93:2:embedded" => { - "1:string" => hashtag, - "2:varint" => 0_i64, - "3:varint" => 1_i64, - }, - }, - "35:string" => "browse-feedFEhashtag", - }, - } + def parse_hashtag_renderer(item_contents) + info = extract_text(item_contents.dig?("hashtagInfoText")) || "" - continuation = object.try { |i| Protodec::Any.cast_json(i) } - .try { |i| Protodec::Any.from_json(i) } - .try { |i| Base64.urlsafe_encode(i) } - .try { |i| URI.encode_www_form(i) } + regex_match = /(?\d+\S)\D+(?\d+\S)/.match(info) - return continuation + hashtag = extract_text(item_contents.dig?("hashtag")) || "" + videos = short_text_to_number(regex_match.try &.["videos"]?.try &.to_s || "0") + channels = short_text_to_number(regex_match.try &.["channels"]?.try &.to_s || "0") + + return HashtagHeader.new({ + tag: hashtag, + channel_count: channels, + video_count: videos, + }) end end diff --git a/src/invidious/helpers/serialized_yt_data.cr b/src/invidious/helpers/serialized_yt_data.cr index 632f045d3..7c12ad0e3 100644 --- a/src/invidious/helpers/serialized_yt_data.cr +++ b/src/invidious/helpers/serialized_yt_data.cr @@ -274,63 +274,4 @@ struct Continuation end end -struct HashtagPage - include DB::Serializable - - property videos : Array(SearchItem) | Array(Video) - property header : HashtagHeader? - - def to_json(locale : String?, json : JSON::Builder) - json.object do - json.field "type", "hashtag" - if self.header != nil - json.field "header" do - self.header.to_json(json) - end - end - json.field "results" do - json.array do - self.videos.each do |item| - item.to_json(locale, json) - end - end - end - end - end - - # TODO: remove the locale and follow the crystal convention - def to_json(locale : String?, _json : Nil) - JSON.build do |json| - to_json(locale, json) - end - end - - def to_json(json : JSON::Builder) - to_json(nil, json) - end -end - -struct HashtagHeader - include DB::Serializable - - property tag : String - property channel_count : Int64 - property video_count : Int64 - - def to_json(json : JSON::Builder) - json.object do - json.field "type", "hashtagHeader" - json.field "hashtag", self.tag - json.field "channelCount", self.channel_count - json.field "videoCount", self.video_count - end - end - - def to_json(_json : Nil) - JSON.build do |json| - to_json(json) - end - end -end - alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index 32537de4b..6686e6e70 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -20,7 +20,6 @@ private ITEM_PARSERS = { Parsers::ReelItemRendererParser, Parsers::ItemSectionRendererParser, Parsers::ContinuationItemRendererParser, - Parsers::HashtagHeaderRenderer, } private alias InitialData = Hash(String, JSON::Any) @@ -551,39 +550,6 @@ private module Parsers return {{@type.name}} end end - - # Parses an InnerTube hashtagHeaderRender into a HashtagHeaderRender. - # Returns nil when the given object isn't a hashtagHeaderRender. - # - # hashtagHeaderRender contains metadate of the hashtag page such as video count and channel count - # - module HashtagHeaderRenderer - def self.process(item : JSON::Any, author_fallback : AuthorFallback) - if item_contents = item["hashtagHeaderRenderer"]? - return self.parse(item_contents) - end - end - - private def self.parse(item_contents) - info = extract_text(item_contents.dig?("hashtagInfoText")) || "" - - regex_match = /(?\d+\S)\D+(?\d+\S)/.match(info) - - hashtag = extract_text(item_contents.dig?("hashtag")) || "" - videos = short_text_to_number(regex_match.try &.["videos"]?.try &.to_s || "0") - channels = short_text_to_number(regex_match.try &.["channels"]?.try &.to_s || "0") - - return HashtagHeader.new({ - tag: hashtag, - channel_count: channels, - video_count: videos, - }) - end - - def self.parser_name - return {{@type.name}} - end - end end # The following are the extractors for extracting an array of items from