diff --git a/config/config.exs b/config/config.exs index f3d93ff86..0f38df2fb 100644 --- a/config/config.exs +++ b/config/config.exs @@ -31,6 +31,7 @@ config :pre_commit, commands: ["format"], verbose: false # of this file so it overrides the configuration defined above. config :groupher_server, :general, + site_host: "https://coderplanets.com", page_size: 30, inner_page_size: 5, # today is not include diff --git a/lib/groupher_server/cms/delegates/article_curd.ex b/lib/groupher_server/cms/delegates/article_curd.ex index 21e5fce72..e5994d163 100644 --- a/lib/groupher_server/cms/delegates/article_curd.ex +++ b/lib/groupher_server/cms/delegates/article_curd.ex @@ -6,7 +6,8 @@ defmodule GroupherServer.CMS.Delegate.ArticleCURD do import GroupherServer.CMS.Helper.Matcher - import Helper.Utils, only: [done: 1, pick_by: 2, module_to_atom: 1, get_config: 2, ensure: 2] + import Helper.Utils, + only: [done: 1, pick_by: 2, module_to_atom: 1, get_config: 2, ensure: 2, module_to_upcase: 1] import GroupherServer.CMS.Delegate.Helper, only: [mark_viewer_emotion_states: 2] import Helper.ErrorCode @@ -17,7 +18,7 @@ defmodule GroupherServer.CMS.Delegate.ArticleCURD do alias Accounts.Model.User alias CMS.Model.{Author, Community, PinnedArticle, Embeds} - alias CMS.Delegate.{ArticleCommunity, ArticleComment, ArticleTag, CommunityCURD} + alias CMS.Delegate.{ArticleCommunity, ArticleComment, ArticleTag, CommunityCURD, CiteTasks} alias Ecto.Multi @@ -163,6 +164,9 @@ defmodule GroupherServer.CMS.Delegate.ArticleCURD do |> Multi.run(:update_user_published_meta, fn _, _ -> Accounts.update_published_states(uid, thread) end) + |> Multi.run(:block_tasks, fn _, %{create_article: article} -> + Later.run({CiteTasks, :handle, [article]}) + end) # TODO: run mini tasks |> Multi.run(:mention_users, fn _, %{create_article: article} -> # article.body |> Jason.decode!() |> 各种小 task @@ -384,6 +388,7 @@ defmodule GroupherServer.CMS.Delegate.ArticleCURD do defp do_create_article(model, attrs, %Author{id: author_id}, %Community{id: community_id}) do # special article like Repo do not have :body, assign it with default-empty rich text body = Map.get(attrs, :body, Converter.Article.default_rich_text()) + meta = @default_article_meta |> Map.merge(%{thread: module_to_upcase(model)}) attrs = attrs |> Map.merge(%{body: body}) with {:ok, attrs} <- add_rich_text_attrs(attrs) do @@ -392,7 +397,7 @@ defmodule GroupherServer.CMS.Delegate.ArticleCURD do |> Ecto.Changeset.put_change(:emotions, @default_emotions) |> Ecto.Changeset.put_change(:author_id, author_id) |> Ecto.Changeset.put_change(:original_community_id, community_id) - |> Ecto.Changeset.put_embed(:meta, @default_article_meta) + |> Ecto.Changeset.put_embed(:meta, meta) |> Repo.insert() end end @@ -444,7 +449,7 @@ defmodule GroupherServer.CMS.Delegate.ArticleCURD do # create done defp result({:ok, %{set_active_at_timestamp: result}}) do - Later.exec({__MODULE__, :notify_admin_new_article, [result]}) + Later.run({__MODULE__, :notify_admin_new_article, [result]}) {:ok, result} end diff --git a/lib/groupher_server/cms/delegates/cite_tasks.ex b/lib/groupher_server/cms/delegates/cite_tasks.ex new file mode 100644 index 000000000..f816a4570 --- /dev/null +++ b/lib/groupher_server/cms/delegates/cite_tasks.ex @@ -0,0 +1,225 @@ +defmodule GroupherServer.CMS.Delegate.CiteTasks do + @moduledoc """ + run tasks in every article blocks if need + + current task: "cite link" and "mention" + + ## cite link + + 我被站内哪些文章或评论引用了,是值得关注的事 + 我引用了谁不重要,帖子里链接已经表明了, 这和 github issue 的双向链接不一样,因为一般不需要关注这个 + 帖子是否解决,是否被 merge 等状态。 + + 基本结构: + + cited_thread, cited_article_id, [xxx_article]_id, [block_id, block2_id], + + POST post_333 -> cited_article_333, [block_id, block2_id]] + + cited_type, cited_content_id, [contents]_id, [block_id, cited_block_id], + + cited_type: thread or comment + content: article or comment + # cited_article_comment_id, [xxx_article]_id, [block_id, block2_id, ...], + """ + + import Ecto.Query, warn: false + import Helper.Utils, only: [get_config: 2, thread_of_article: 1, done: 1] + import GroupherServer.CMS.Helper.Matcher + import Helper.ErrorCode + + alias GroupherServer.{CMS, Repo} + alias CMS.Model.CitedContent + alias Helper.ORM + + alias Ecto.Multi + + @site_host get_config(:general, :site_host) + @article_threads get_config(:article, :threads) + @valid_article_prefix Enum.map(@article_threads, &"#{@site_host}/#{&1}/") + + def handle(%{body: body} = article) do + with {:ok, %{"blocks" => blocks}} <- Jason.decode(body), + article <- Repo.preload(article, author: :user) do + Multi.new() + |> Multi.run(:delete_all_cited_contents, fn _, _ -> + delete_all_cited_contents(article) + end) + |> Multi.run(:update_cited_info, fn _, _ -> + blocks + |> Enum.reduce([], &(&2 ++ parse_cited_info_per_block(article, &1))) + |> merge_same_cited_article_block + |> update_cited_info + end) + |> Repo.transaction() + |> result() + end + end + + # delete all records before insert_all, this will dynamiclly update + # those cited info when update article + # 插入引用记录之前先全部清除,这样可以在更新文章的时候自动计算引用信息 + defp delete_all_cited_contents(article) do + with {:ok, thread} <- thread_of_article(article), + {:ok, info} <- match(thread) do + query = from(c in CitedContent, where: field(c, ^info.foreign_key) == ^article.id) + + ORM.delete_all(query, :if_exist) + end + end + + # defp batch_done + + defp update_cited_info(cited_contents) do + clean_cited_contents = Enum.map(cited_contents, &Map.delete(&1, :cited_article)) + # IO.inspect(clean_cited_contents, label: "clean_cited_contents") + with true <- {0, nil} !== Repo.insert_all(CitedContent, clean_cited_contents) do + update_citing_count(cited_contents) + else + _ -> {:error, "insert cited content error"} + end + end + + defp update_citing_count(cited_contents) do + Enum.all?(cited_contents, fn content -> + count_query = from(c in CitedContent, where: c.cited_by_id == ^content.cited_by_id) + count = Repo.aggregate(count_query, :count) + + cited_article = content.cited_article + meta = Map.merge(cited_article.meta, %{citing_count: count}) + + case cited_article |> ORM.update_meta(meta) do + {:ok, _} -> true + {:error, _} -> false + end + end) + |> done + end + + @doc """ + merge same cited article in different blocks + e.g: + [ + %{ + block_linker: ["block-zByQI"], + cited_by_id: 190058, + cited_by_type: "POST", + post_id: 190059, + user_id: 1413053 + }, + %{ + block_linker: ["block-zByQI", "block-ZgKJs"], + cited_by_id: 190057, + cited_by_type: "POST", + post_id: 190059, + user_id: 1413053 + }, + ] + """ + defp merge_same_cited_article_block(cited_contents) do + cited_contents + |> Enum.reduce([], fn content, acc -> + case Enum.find_index(acc, &(&1.cited_by_id == content.cited_by_id)) do + nil -> + acc ++ [content] + + index -> + List.update_at( + acc, + index, + &Map.merge(&1, %{block_linker: &1.block_linker ++ content.block_linker}) + ) + end + end) + end + + @doc """ + return fmt like: + [ + %{ + block_linker: ["block-ZgKJs"], + cited_by_id: 190057, + cited_by_type: "POST", + cited_article: #loaded, + post_id: 190059, + user_id: 1413053 + } + ... + ] + """ + defp parse_cited_info_per_block(article, %{"id" => block_id, "data" => %{"text" => text}}) do + links_in_block = Floki.find(text, "a[href]") + + Enum.reduce(links_in_block, [], fn link, acc -> + with {:ok, cited_article} <- parse_cited_article(link), + # do not cite artilce itself + true <- article.id !== cited_article.id do + List.insert_at(acc, 0, shape_cited_content(article, cited_article, block_id)) + else + _ -> acc + end + end) + |> Enum.uniq() + end + + defp shape_cited_content(article, cited_article, block_id) do + {:ok, thread} = thread_of_article(article) + {:ok, info} = match(thread) + + %{ + cited_by_id: cited_article.id, + cited_by_type: cited_article.meta.thread, + # used for updating citing_count, avoid load again + cited_article: cited_article, + block_linker: [block_id], + user_id: article.author.user.id + } + |> Map.put(info.foreign_key, article.id) + end + + defp parse_cited_article({"a", attrs, _}) do + with {:ok, link} <- parse_link(attrs), + true <- is_site_article_link?(link) do + load_cited_article_from_url(link) + end + end + + @doc """ + parse link from Floki parse result + + e.g: + [{"href", "https://coderplanets.com/post/190220", "bla", "bla"}] -> + {:ok, "https://coderplanets.com/post/190220"} + """ + defp parse_link(attrs) do + with {"href", link} <- Enum.find(attrs, fn {a, _v} -> a == "href" end) do + {:ok, link} + else + _ -> {:error, "invalid fmt"} + end + end + + # 检测是否是站内文章的链接 + defp is_site_article_link?(url) do + Enum.any?(@valid_article_prefix, &String.starts_with?(url, &1)) + end + + # get cited article from url + # e.g: https://coderplanets.com/post/189993 -> ORM.find(Post, 189993) + defp load_cited_article_from_url(url) do + %{path: path} = URI.parse(url) + path_list = path |> String.split("/") + thread = path_list |> Enum.at(1) |> String.downcase() |> String.to_atom() + article_id = path_list |> Enum.at(2) + + with {:ok, info} <- match(thread) do + ORM.find(info.model, article_id) + end + end + + defp result({:ok, %{update_cited_info: result}}), do: {:ok, result} + + defp result({:error, :update_cited_info, _result, _steps}) do + {:error, [message: "cited article", code: ecode(:cite_artilce)]} + end +end diff --git a/lib/groupher_server/cms/helper/macros.ex b/lib/groupher_server/cms/helper/macros.ex index 9d542d641..5e1cbdf60 100644 --- a/lib/groupher_server/cms/helper/macros.ex +++ b/lib/groupher_server/cms/helper/macros.ex @@ -208,7 +208,6 @@ defmodule GroupherServer.CMS.Helper.Macros do field(:active_at, :utc_datetime_usec) # TODO: - # reference_articles # related_articles timestamps() end diff --git a/lib/groupher_server/cms/models/cited_content.ex b/lib/groupher_server/cms/models/cited_content.ex new file mode 100644 index 000000000..17f4c2c55 --- /dev/null +++ b/lib/groupher_server/cms/models/cited_content.ex @@ -0,0 +1,49 @@ +defmodule GroupherServer.CMS.Model.CitedContent do + @moduledoc false + alias __MODULE__ + + use Ecto.Schema + use Accessible + + import Ecto.Changeset + import GroupherServer.CMS.Helper.Macros + + alias GroupherServer.{Accounts, CMS} + alias Accounts.Model.User + + alias CMS.Model.ArticleComment + + @timestamps_opts [type: :utc_datetime_usec] + + @required_fields ~w(cited_by_type cited_by_id user_id)a + @article_cast_fields general_article_fields(:cast) + @optional_fields ~w(article_comment_id block_linker)a ++ @article_cast_fields + + @type t :: %CitedContent{} + schema "cited_contents" do + field(:cited_by_type, :string) + field(:cited_by_id, :id) + + belongs_to(:author, User, foreign_key: :user_id) + belongs_to(:article_comment, ArticleComment, foreign_key: :article_comment_id) + + article_belongs_to_fields() + + field(:block_linker, {:array, :string}) + # content.block_linker = ["block-eee_block-bbb", "block-eee_block-bbb"] + timestamps() + end + + @doc false + def changeset(%CitedContent{} = cited_content, attrs) do + cited_content + |> cast(attrs, @optional_fields ++ @required_fields) + |> validate_required(@required_fields) + end + + @doc false + def update_changeset(%CitedContent{} = cited_content, attrs) do + cited_content + |> cast(attrs, @optional_fields ++ @required_fields) + end +end diff --git a/lib/groupher_server/cms/models/embeds/article_meta.ex b/lib/groupher_server/cms/models/embeds/article_meta.ex index 96be1c5f1..7a6132823 100644 --- a/lib/groupher_server/cms/models/embeds/article_meta.ex +++ b/lib/groupher_server/cms/models/embeds/article_meta.ex @@ -6,11 +6,12 @@ defmodule GroupherServer.CMS.Model.Embeds.ArticleMeta do use Accessible import Ecto.Changeset - @optional_fields ~w(is_edited is_comment_locked upvoted_user_ids collected_user_ids viewed_user_ids reported_user_ids reported_count is_sinked can_undo_sink last_active_at)a + @optional_fields ~w(thread is_edited is_comment_locked upvoted_user_ids collected_user_ids viewed_user_ids reported_user_ids reported_count is_sinked can_undo_sink last_active_at)a @doc "for test usage" def default_meta() do %{ + thread: "POST", is_edited: false, is_comment_locked: false, upvoted_user_ids: [], @@ -20,11 +21,13 @@ defmodule GroupherServer.CMS.Model.Embeds.ArticleMeta do reported_count: 0, is_sinked: false, can_undo_sink: true, - last_active_at: nil + last_active_at: nil, + citing_count: 0 } end embedded_schema do + field(:thread, :string) field(:is_edited, :boolean, default: false) field(:is_comment_locked, :boolean, default: false) # reaction history @@ -38,6 +41,7 @@ defmodule GroupherServer.CMS.Model.Embeds.ArticleMeta do field(:can_undo_sink, :boolean, default: false) # if undo_sink, can recover last active_at from here field(:last_active_at, :utc_datetime_usec) + field(:citing_count, :integer, default: 0) end def changeset(struct, params) do diff --git a/lib/groupher_server/cms/models/embeds/block_task_runner.ex b/lib/groupher_server/cms/models/embeds/block_task_runner.ex new file mode 100644 index 000000000..d705872b2 --- /dev/null +++ b/lib/groupher_server/cms/models/embeds/block_task_runner.ex @@ -0,0 +1,31 @@ +defmodule GroupherServer.CMS.Model.Embeds.BlockTaskRunner do + @moduledoc """ + general article meta info for article-like content, like post, job, works ... + """ + use Ecto.Schema + use Accessible + import Ecto.Changeset + + alias GroupherServer.CMS.Model.Embeds + + @optional_fields ~w(bi_link_tasks)a + # @optional_fields ~w(bi_link_tasks mention_user_tasks)a + + @doc "for test usage" + def default_meta() do + %{ + bi_link_tasks: [] + # mention_user_tasks: [] + } + end + + embedded_schema do + embeds_many(:reference_tasks, Embeds.ReferenceTask, on_replace: :delete) + # embeds_many(:mention_user_tasks, Embeds.MentionUserTask, on_replace: :delete) + end + + def changeset(struct, params) do + struct + |> cast(params, @optional_fields) + end +end diff --git a/lib/groupher_server/cms/models/embeds/reference_task.ex b/lib/groupher_server/cms/models/embeds/reference_task.ex new file mode 100644 index 000000000..308db8779 --- /dev/null +++ b/lib/groupher_server/cms/models/embeds/reference_task.ex @@ -0,0 +1,36 @@ +defmodule GroupherServer.CMS.Model.Embeds.ReferenceTask do + @moduledoc """ + general article meta info for article-like content, like post, job, works ... + """ + use Ecto.Schema + use Accessible + import Ecto.Changeset + + @optional_fields ~w(bi_link_tasks mention_user_tasks)a + + # thread, article_id, block_id, author_id, cite_thread, cite_article_id, cite_block_id, cite_author_id + + @doc "for test usage" + def default_meta() do + %{ + # bi_link_tasks: [], + # mention_user_tasks: [] + } + end + + embedded_schema do + field(:article_id, :id) + field(:block_id, :string) + + field(:reference_article_id, :id) + # 可选 + field(:reference_block_id, :string) + + field(:is_finished, :boolean, default: false) + end + + def changeset(struct, params) do + struct + |> cast(params, @optional_fields) + end +end diff --git a/lib/groupher_server/cms/models/post.ex b/lib/groupher_server/cms/models/post.ex index d16e5710f..5f2059e64 100644 --- a/lib/groupher_server/cms/models/post.ex +++ b/lib/groupher_server/cms/models/post.ex @@ -30,6 +30,10 @@ defmodule GroupherServer.CMS.Model.Post do field(:is_solved, :boolean, default: false) field(:solution_digest, :string) + # TODO: move to general_article_fields + # embeds_one(:block_task_runner, Embeds.BlockTaskRunner, on_replace: :update) + # embeds_many(:citing_contents, CMS.CitedContent, on_replace: :delete) + article_tags_field(:post) article_communities_field(:post) general_article_fields() diff --git a/lib/groupher_server/statistics/delegates/contribute.ex b/lib/groupher_server/statistics/delegates/contribute.ex index ec072c4ff..6165c7d7a 100644 --- a/lib/groupher_server/statistics/delegates/contribute.ex +++ b/lib/groupher_server/statistics/delegates/contribute.ex @@ -134,7 +134,7 @@ defmodule GroupherServer.Statistics.Delegate.Contribute do end defp cache_contribute_later(%Community{id: id}) do - Later.exec({__MODULE__, :get_contributes_then_cache, [%Community{id: id}]}) + Later.run({__MODULE__, :get_contributes_then_cache, [%Community{id: id}]}) end defp do_get_contributes(%Community{id: id}) do diff --git a/lib/helper/error_code.ex b/lib/helper/error_code.ex index 825fac99f..302697d65 100644 --- a/lib/helper/error_code.ex +++ b/lib/helper/error_code.ex @@ -51,6 +51,7 @@ defmodule Helper.ErrorCode do def ecode(:undo_sink_old_article), do: @article_base + 7 def ecode(:article_comment_locked), do: @article_base + 8 def ecode(:require_questioner), do: @article_base + 9 + def ecode(:cite_artilce), do: @article_base + 10 # def ecode(:already_solved), do: @article_base + 10 def ecode, do: @default_base diff --git a/lib/helper/later.ex b/lib/helper/later.ex index 9644b106c..779f5157b 100644 --- a/lib/helper/later.ex +++ b/lib/helper/later.ex @@ -5,10 +5,12 @@ defmodule Helper.Later do @doc """ ## Example - iex> Later.exec({__MODULE__, :get_contributes_then_cache, [%Community{id: id}]}) + iex> Later.run({__MODULE__, :get_contributes_then_cache, [%Community{id: id}]}) {:ok, _} """ - def exec({mod, func, args}) do + def run({mod, func, args}) do Rihanna.enqueue({mod, func, args}) + # weather enqueue success or not, just return {:ok, :pass}, or Multi.Job will be rollback + {:ok, :pass} end end diff --git a/lib/helper/orm.ex b/lib/helper/orm.ex index f2145fc94..6e473dceb 100644 --- a/lib/helper/orm.ex +++ b/lib/helper/orm.ex @@ -173,6 +173,16 @@ defmodule Helper.ORM do end end + @doc """ + delete all queryable if exist + """ + def delete_all(queryable, :if_exist) do + case Repo.exists?(queryable) do + true -> {:ok, Repo.delete_all(queryable)} + false -> {:ok, :pass} + end + end + def findby_or_insert(queryable, clauses, attrs) do case queryable |> find_by(clauses) do {:ok, content} -> diff --git a/lib/helper/utils/utils.ex b/lib/helper/utils/utils.ex index 18885476b..34cacb414 100644 --- a/lib/helper/utils/utils.ex +++ b/lib/helper/utils/utils.ex @@ -55,6 +55,8 @@ defmodule Helper.Utils do @doc """ handle General {:ok, ..} or {:error, ..} return """ + def done(false), do: {:error, false} + def done(true), do: {:ok, true} def done(nil, :boolean), do: {:ok, false} def done(_, :boolean), do: {:ok, true} def done(nil, err_msg), do: {:error, err_msg} @@ -182,6 +184,13 @@ defmodule Helper.Utils do def strip_struct(map) when is_map(map), do: map + @doc """ + get upcase name of a module, most used for store thread in DB + """ + def module_to_upcase(module) do + module |> Module.split() |> List.last() |> String.upcase() + end + @doc """ get atom name of a module """ @@ -201,6 +210,13 @@ defmodule Helper.Utils do end end + # get thread of article + def thread_of_article(%{meta: %{thread: thread}}) do + thread |> String.downcase() |> String.to_atom() |> done + end + + def thread_of_article(_), do: {:error, "invalid article"} + def uid(str_len \\ 5) do Nanoid.generate(str_len, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") end diff --git a/priv/repo/migrations/20210611015726_create_cited_articles.exs b/priv/repo/migrations/20210611015726_create_cited_articles.exs new file mode 100644 index 000000000..253ff662e --- /dev/null +++ b/priv/repo/migrations/20210611015726_create_cited_articles.exs @@ -0,0 +1,23 @@ +defmodule GroupherServer.Repo.Migrations.CreateCitedArticles do + use Ecto.Migration + + def change do + create table(:cited_contents) do + # cited_type, cited_content_id, [contents]_id, [block_id, cited_block_id], + add(:cited_by_type, :string) + add(:cited_by_id, :id) + + add(:user_id, references(:users, on_delete: :delete_all), null: false) + add(:article_comment_id, references(:articles_comments, on_delete: :delete_all)) + + add(:post_id, references(:cms_posts, on_delete: :delete_all)) + add(:repo_id, references(:cms_repos, on_delete: :delete_all)) + add(:job_id, references(:cms_jobs, on_delete: :delete_all)) + add(:blog_id, references(:cms_blogs, on_delete: :delete_all)) + + add(:block_linker, {:array, :string}) + end + + create(index(:cited_contents, [:cited_by_type, :cited_by_id])) + end +end diff --git a/test/groupher_server/cms/articles/blog_test.exs b/test/groupher_server/cms/articles/blog_test.exs index 3c02b7400..2e1e00deb 100644 --- a/test/groupher_server/cms/articles/blog_test.exs +++ b/test/groupher_server/cms/articles/blog_test.exs @@ -28,6 +28,8 @@ defmodule GroupherServer.Test.Articles.Blog do body_map = Jason.decode!(blog.body) + assert blog.meta.thread == "BLOG" + assert blog.title == blog_attrs.title assert body_map |> Validator.is_valid() assert blog.body_html |> String.contains?(~s(