Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions app/controllers/discourse_ai/embeddings/embeddings_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ class EmbeddingsController < ::ApplicationController

def search
query = params[:q]
page = (params[:page] || 1).to_i

grouped_results =
Search::GroupedSearchResults.new(
Expand All @@ -19,12 +18,19 @@ def search
use_pg_headlines_for_excerpt: false,
)

DiscourseAi::Embeddings::SemanticSearch
.new(guardian)
.search_for_topics(query, page)
.each { |topic_post| grouped_results.add(topic_post) }
semantic_search = DiscourseAi::Embeddings::SemanticSearch.new(guardian)

render_serialized(grouped_results, GroupedSearchResultSerializer, result: grouped_results)
if !semantic_search.cached_query?(query)
RateLimiter.new(current_user, "semantic-search", 4, 1.minutes).performed!
end

hijack do
semantic_search
.search_for_topics(query)
.each { |topic_post| grouped_results.add(topic_post) }

render_serialized(grouped_results, GroupedSearchResultSerializer, result: grouped_results)
end
end
end
end
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{{#if this.searchEnabled}}
<div class="semantic-search__container search-results" role="region">
<div
class="semantic-search__results"
{{did-insert this.setup}}
{{did-insert this.debouncedSearch}}
{{will-destroy this.teardown}}
>
{{#if this.searching}}
<div class="semantic-search__searching">
<div class="semantic-search__searching-text">
{{i18n "discourse_ai.embeddings.semantic_search_loading"}}
</div>
<span class="semantic-search__indicator-wave">
<span class="semantic-search__indicator-dot">.</span>
<span class="semantic-search__indicator-dot">.</span>
<span class="semantic-search__indicator-dot">.</span>
</span>
</div>
{{else}}
{{#if this.results.length}}
<div class="semantic-search__toggle-button-container">
<DButton
@translatedTitle={{this.collapsedResultsTitle}}
@translatedLabel={{this.collapsedResultsTitle}}
@action={{fn
(mut this.collapsedResults)
(not this.collapsedResults)
}}
@class="btn-flat"
@icon={{if this.collapsedResults "chevron-right" "chevron-down"}}
/>
</div>

{{#unless this.collapsedResults}}
<div class="semantic-search__entries">
<SearchResultEntries
@posts={{this.results}}
@highlightQuery={{this.highlightQuery}}
/>
</div>
{{/unless}}
{{else}}
<div class="semantic-search__searching">
{{i18n "discourse_ai.embeddings.semantic_search_results.none"}}
</div>
{{/if}}
{{/if}}
</div>
</div>
{{/if}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import Component from "@glimmer/component";
import { action, computed } from "@ember/object";
import I18n from "I18n";
import { tracked } from "@glimmer/tracking";
import { ajax } from "discourse/lib/ajax";
import { translateResults } from "discourse/lib/search";
import discourseDebounce from "discourse-common/lib/debounce";
import { inject as service } from "@ember/service";
import { bind } from "discourse-common/utils/decorators";
import { SEARCH_TYPE_DEFAULT } from "discourse/controllers/full-page-search";

export default class extends Component {
static shouldRender(_args, { siteSettings }) {
return siteSettings.ai_embeddings_semantic_search_enabled;
}

@service appEvents;

@tracked searching = false;
@tracked collapsedResults = true;
@tracked results = [];

@computed("args.outletArgs.search")
get searchTerm() {
return this.args.outletArgs.search;
}

@computed("args.outletArgs.type")
get searchEnabled() {
return this.args.outletArgs.type === SEARCH_TYPE_DEFAULT;
}

@computed("results")
get collapsedResultsTitle() {
return I18n.t("discourse_ai.embeddings.semantic_search_results.toggle", {
count: this.results.length,
});
}

@action
setup() {
this.appEvents.on(
"full-page-search:trigger-search",
this,
"debouncedSearch"
);
}

@action
teardown() {
this.appEvents.off(
"full-page-search:trigger-search",
this,
"debouncedSearch"
);
}

@bind
performHyDESearch() {
if (!this.searchTerm || !this.searchEnabled || this.searching) {
return;
}

this.searching = true;
this.collapsedResults = true;
this.results = [];

ajax("/discourse-ai/embeddings/semantic-search", {
data: { q: this.searchTerm },
})
.then(async (results) => {
const model = (await translateResults(results)) || {};
this.results = model.posts;
})
.finally(() => (this.searching = false));
}

@action
debouncedSearch() {
discourseDebounce(this, this.performHyDESearch, 500);
}
}
63 changes: 0 additions & 63 deletions assets/javascripts/initializers/semantic-full-page-search.js

This file was deleted.

39 changes: 39 additions & 0 deletions assets/stylesheets/modules/embeddings/common/semantic-search.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
.semantic-search__container {
background: var(--primary-very-low);
margin: 1rem 0 1rem 0;

.semantic-search__results {
display: flex;
flex-direction: column;
align-items: baseline;

.semantic-search {
&__searching-text {
display: inline-block;
margin-left: 3px;
}
&__indicator-wave {
flex: 0 0 auto;
display: inline-flex;
}
&__indicator-dot {
display: inline-block;
animation: ai-summary__indicator-wave 1.8s linear infinite;
&:nth-child(2) {
animation-delay: -1.6s;
}
&:nth-child(3) {
animation-delay: -1.4s;
}
}
}

.semantic-search__entries {
margin-top: 10px;
}

.semantic-search__searching {
margin-left: 5px;
}
}
}
4 changes: 4 additions & 0 deletions config/locales/client.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ en:

embeddings:
semantic_search: "Topics (Semantic)"
semantic_search_loading: "Searching for more results using AI"
semantic_search_results:
toggle: "Found %{count} results using AI"
none: "Sorry, our AI search found no matching topics."

ai_bot:
pm_warning: "AI chatbot messages are monitored regularly by moderators."
Expand Down
1 change: 1 addition & 0 deletions config/locales/server.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ en:
ai_embeddings_pg_connection_string: "PostgreSQL connection string for the embeddings module. Needs pgvector extension enabled and a series of tables created. See docs for more info."
ai_embeddings_semantic_search_enabled: "Enable full-page semantic search."
ai_embeddings_semantic_related_include_closed_topics: "Include closed topics in semantic search results"
ai_embeddings_semantic_search_hyde_model: "Model used to expand keywords to get better results during a semantic search"

ai_summarization_discourse_service_api_endpoint: "URL where the Discourse summarization API is running."
ai_summarization_discourse_service_api_key: "API key for the Discourse summarization API."
Expand Down
12 changes: 12 additions & 0 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,18 @@ discourse_ai:
ai_embeddings_semantic_search_enabled:
default: false
client: true
ai_embeddings_semantic_search_hyde_model:
default: "gpt-3.5-turbo"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember this being neccessary for things to work, but I don't know if it's still true after all the changes. Will take a look.

type: enum
allow_any: false
choices:
- Llama2-*-chat-hf
- claude-instant-1
- claude-2
- gpt-3.5-turbo
- gpt-4
- StableBeluga2
- Upstage-Llama-2-*-instruct-v2

ai_summarization_discourse_service_api_endpoint: ""
ai_summarization_discourse_service_api_key:
Expand Down
11 changes: 5 additions & 6 deletions lib/modules/ai_helper/semantic_categorizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@ def categories
return [] if @text.blank?
return [] unless SiteSetting.ai_embeddings_enabled

strategy = DiscourseAi::Embeddings::Strategies::Truncation.new
vector_rep =
DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation(strategy)

candidates =
::DiscourseAi::Embeddings::SemanticSearch.new(nil).asymmetric_semantic_search(
@text,
100,
0,
return_distance: true,
)
vector_rep.asymmetric_semantic_search(@text, limit: 100, offset: 0, return_distance: true)
candidate_ids = candidates.map(&:first)

::Topic
Expand Down
17 changes: 11 additions & 6 deletions lib/modules/embeddings/entry_point.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,21 @@ module DiscourseAi
module Embeddings
class EntryPoint
def load_files
require_relative "models/base"
require_relative "models/all_mpnet_base_v2"
require_relative "models/text_embedding_ada_002"
require_relative "models/multilingual_e5_large"
require_relative "vector_representations/base"
require_relative "vector_representations/all_mpnet_base_v2"
require_relative "vector_representations/text_embedding_ada_002"
require_relative "vector_representations/multilingual_e5_large"
require_relative "strategies/truncation"
require_relative "manager"
require_relative "jobs/regular/generate_embeddings"
require_relative "semantic_related"
require_relative "semantic_search"
require_relative "semantic_topic_query"

require_relative "hyde_generators/base"
require_relative "hyde_generators/openai"
require_relative "hyde_generators/anthropic"
require_relative "hyde_generators/llama2"
require_relative "hyde_generators/llama2_ftos"
require_relative "semantic_search"
end

def inject_into(plugin)
Expand Down
32 changes: 32 additions & 0 deletions lib/modules/embeddings/hyde_generators/anthropic.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# frozen_string_literal: true

module DiscourseAi
module Embeddings
module HydeGenerators
class Anthropic < DiscourseAi::Embeddings::HydeGenerators::Base
def prompt(search_term)
<<~TEXT
Given a search term given between <input> tags, generate a forum post about the search term.
Respond with the generated post between <ai> tags.

<input>#{search_term}</input>
TEXT
end

def models
%w[claude-instant-1 claude-2]
end

def hypothetical_post_from(query)
response =
::DiscourseAi::Inference::AnthropicCompletions.perform!(
prompt(query),
SiteSetting.ai_embeddings_semantic_search_hyde_model,
).dig(:completion)

Nokogiri::HTML5.fragment(response).at("ai").text
end
end
end
end
end
Loading