Skip to content

Commit

Permalink
Handle scenario where vLLM uses a SRV record
Browse files Browse the repository at this point in the history
  • Loading branch information
romanrizzi committed Jun 18, 2024
1 parent 43acecf commit 64c2a8f
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 20 deletions.
24 changes: 24 additions & 0 deletions app/models/llm_model.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,33 @@

class LlmModel < ActiveRecord::Base
FIRST_BOT_USER_ID = -1200
RESERVED_VLLM_SRV_URL = "https://vllm.shadowed-by-srv.invalid"

belongs_to :user

validates :url, exclusion: { in: [RESERVED_VLLM_SRV_URL] }

def self.enable_or_disable_srv_llm!
srv_model = find_by(url: RESERVED_VLLM_SRV_URL)
if SiteSetting.ai_vllm_endpoint_srv.present? && srv_model.blank?
record =
new(
display_name: "vLLM SRV LLM",
name: "mistralai/Mixtral",
provider: "vllm",
tokenizer: "DiscourseAi::Tokenizer::MixtralTokenizer",
url: RESERVED_VLLM_SRV_URL,
vllm_key: "",
user_id: nil,
enabled_chat_bot: false,
)

record.save(validate: false) # Ignore reserved URL validation
elsif srv_model.present?
srv_model.destroy!
end
end

def toggle_companion_user
return if name == "fake" && Rails.env.production?

Expand Down
7 changes: 6 additions & 1 deletion app/serializers/llm_model_serializer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,10 @@ class LlmModelSerializer < ApplicationSerializer
:tokenizer,
:api_key,
:url,
:enabled_chat_bot
:enabled_chat_bot,
:url_editable

def url_editable
object.url != LlmModel::RESERVED_VLLM_SRV_URL
end
end
24 changes: 16 additions & 8 deletions assets/javascripts/discourse/components/ai-llm-editor.gjs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ export default class AiLlmEditor extends Component {
@route="adminPlugins.show.discourse-ai-llms"
@label="discourse_ai.llms.back"
/>
{{#unless @model.url_editable}}
<div class="alert alert-info">
{{icon "exclamation-circle"}}
{{I18n.t "discourse_ai.llms.srv_warning"}}
</div>
{{/unless}}
<form class="form-horizontal ai-llm-editor">
<div class="control-group">
<label>{{i18n "discourse_ai.llms.display_name"}}</label>
Expand Down Expand Up @@ -159,14 +165,16 @@ export default class AiLlmEditor extends Component {
@content={{this.selectedProviders}}
/>
</div>
<div class="control-group">
<label>{{I18n.t "discourse_ai.llms.url"}}</label>
<Input
class="ai-llm-editor-input ai-llm-editor__url"
@type="text"
@value={{@model.url}}
/>
</div>
{{#if @model.url_editable}}
<div class="control-group">
<label>{{I18n.t "discourse_ai.llms.url"}}</label>
<Input
class="ai-llm-editor-input ai-llm-editor__url"
@type="text"
@value={{@model.url}}
/>
</div>
{{/if}}
<div class="control-group">
<label>{{I18n.t "discourse_ai.llms.api_key"}}</label>
<Input
Expand Down
2 changes: 2 additions & 0 deletions config/locales/client.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ en:
confirm_delete: Are you sure you want to delete this model?
delete: Delete

srv_warning: This LLM points to an SRV record, and its URL is not editable. You have to update the hidden "ai_vllm_endpoint_srv" setting instead.

tests:
title: "Run Test"
running: "Running test..."
Expand Down
2 changes: 1 addition & 1 deletion db/post_migrate/20240528144216_seed_open_ai_models.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def up
models = []

open_ai_api_key = fetch_setting("ai_openai_api_key")
enabled_models = fetch_setting("ai_bot_enabled_chat_bots").to_a.split("|")
enabled_models = fetch_setting("ai_bot_enabled_chat_bots")&.split("|").to_a
enabled_models = ["gpt-3.5-turbo"] if enabled_models.empty?

if open_ai_api_key.present?
Expand Down
4 changes: 2 additions & 2 deletions db/post_migrate/20240531205234_seed_claude_models.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ def up
models = []

bedrock_secret_access_key = fetch_setting("ai_bedrock_secret_access_key")
enabled_models = fetch_setting("ai_bot_enabled_chat_bots").to_a.split("|")
enabled_models = fetch_setting("ai_bot_enabled_chat_bots")&.split("|").to_a

if bedrock_secret_access_key.present?
bedrock_region = fetch_setting("ai_bedrock_region")
bedrock_region = fetch_setting("ai_bedrock_region") || "us-east-1"

claude_models.each do |cm|
url =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def up
models = []

gemini_key = fetch_setting("ai_gemini_api_key")
enabled_models = fetch_setting("ai_bot_enabled_chat_bots").to_a.split("|")
enabled_models = fetch_setting("ai_bot_enabled_chat_bots")&.split("|").to_a

if gemini_key.present?
gemini_models = %w[gemini-pro gemini-1.5-pro gemini-1.5-flash]
Expand Down
21 changes: 15 additions & 6 deletions db/post_migrate/20240603143158_seed_oss_models.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
class SeedOssModels < ActiveRecord::Migration[7.0]
def up
models = []
enabled_models = fetch_setting("ai_bot_enabled_chat_bots")&.split("|").to_a
enabled = enabled_models.include?("mixtral-8x7B-Instruct-V0.1")

hf_key = fetch_setting("ai_hugging_face_api_key")
hf_url = fetch_setting("ai_hugging_face_api_url")
Expand All @@ -16,7 +18,7 @@ def up
name = hf_display_name || "mistralai/Mixtral"
token_limit = hf_token_limit || 32_000

models << "('#{name}', '#{name}', 'hugging_face', 'DiscourseAi::Tokenizer::MixtralTokenizer', #{token_limit}, '#{hf_url}', '#{hf_key}', #{user_id}, NOW(), NOW())"
models << "('#{name}', '#{name}', 'hugging_face', 'DiscourseAi::Tokenizer::MixtralTokenizer', #{token_limit}, '#{hf_url}', '#{hf_key}', #{user_id}, #{enabled}, NOW(), NOW())"
end

vllm_key = fetch_setting("ai_vllm_api_key")
Expand All @@ -26,23 +28,30 @@ def up
url = "#{vllm_url}/v1/chat/completions"
name = "mistralai/Mixtral"

models << "('#{name}', '#{name}', 'vllm', 'DiscourseAi::Tokenizer::MixtralTokenizer', 32000, '#{url}', '#{vllm_key}', #{user_id}, NOW(), NOW())"
models << "('#{name}', '#{name}', 'vllm', 'DiscourseAi::Tokenizer::MixtralTokenizer', 32000, '#{url}', '#{vllm_key}', #{user_id}, #{enabled}, NOW(), NOW())"
end

vllm_srv = fetch_setting("ai_vllm_endpoint_srv")
srv_reserved_url = "https://vllm.shadowed-by-srv.invalid"

if vllm_srv.present? && vllm_key.present?
url = "https://shadowed-by-srv.invalid"
srv_record =
DB.query_single(
"SELECT id FROM llm_models WHERE url = :reserved",
reserved: srv_reserved_url,
).first

if vllm_srv.present? && srv.record.nil?
url = "https://vllm.shadowed-by-srv.invalid"
name = "mistralai/Mixtral"

models << "('#{name}', '#{name}', 'vllm', 'DiscourseAi::Tokenizer::MixtralTokenizer', 32000, '#{url}', '#{vllm_key}', #{user_id}, NOW(), NOW())"
models << "('vLLM SRV LLM', '#{name}', 'vllm', 'DiscourseAi::Tokenizer::MixtralTokenizer', 32000, '#{url}', '#{vllm_key}', #{user_id}, #{enabled}, NOW(), NOW())"
end

if models.present?
rows = models.compact.join(", ")

DB.exec(<<~SQL, rows: rows) if rows.present?
INSERT INTO llm_models(display_name, name, provider, tokenizer, max_prompt_tokens, url, api_key, user_id, created_at, updated_at)
INSERT INTO llm_models(display_name, name, provider, tokenizer, max_prompt_tokens, url, api_key, user_id, enabled_chat_bot, created_at, updated_at)
VALUES #{rows};
SQL
end
Expand Down
4 changes: 3 additions & 1 deletion lib/completions/endpoints/vllm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ def provider_id
private

def model_uri
return URI(llm_model.url) if llm_model&.url
if llm_model&.url && !llm_model&.url == LlmModel::RESERVED_VLLM_SRV_URL
return URI(llm_model.url)
end

service = DiscourseAi::Utils::DnsSrv.lookup(SiteSetting.ai_vllm_endpoint_srv)
if service.present?
Expand Down
2 changes: 2 additions & 0 deletions plugin.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ module ::DiscourseAi

add_admin_route("discourse_ai.title", "discourse-ai", { use_new_show_route: true })

LlmModel.enable_or_disable_srv_llm!

[
DiscourseAi::Embeddings::EntryPoint.new,
DiscourseAi::Nsfw::EntryPoint.new,
Expand Down

0 comments on commit 64c2a8f

Please sign in to comment.