Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEATURE: implement GPT-4 turbo support #345

Merged
merged 2 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion config/locales/client.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ en:

bot_names:
gpt-4: "GPT-4"
gpt-4-turbo: "GPT-4 Turbo"
gpt-3:
5-turbo: "GPT-3.5"
claude-2: "Claude 2"
Expand All @@ -135,7 +136,7 @@ en:
label: "sentiment"
title: "Experimental AI-powered sentiment analysis of this person's most recent posts."



review:
types:
Expand Down
1 change: 1 addition & 0 deletions config/locales/server.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ en:
ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
ai_openai_gpt4_turbo_url: "Custom URL used for GPT 4 turbo chat completions. (for Azure support)"
SamSaffron marked this conversation as resolved.
Show resolved Hide resolved
ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)"
ai_openai_organization: "(Optional, leave empty to omit) Organization id used for the OpenAI API. Passed in using the OpenAI-Organization header."
ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"
Expand Down
2 changes: 2 additions & 0 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ discourse_ai:
ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4_turbo_url: "https://api.openai.com/v1/chat/completions"
ai_openai_dall_e_3_url: "https://api.openai.com/v1/images/generations"
ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
ai_openai_organization: ""
Expand Down Expand Up @@ -256,6 +257,7 @@ discourse_ai:
choices:
- gpt-3.5-turbo
- gpt-4
- gpt-4-turbo
- claude-2
ai_bot_add_to_header:
default: true
Expand Down
4 changes: 4 additions & 0 deletions lib/ai_bot/entry_point.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,18 @@ class EntryPoint
GPT4_ID = -110
GPT3_5_TURBO_ID = -111
CLAUDE_V2_ID = -112
GPT4_TURBO_ID = -113
BOTS = [
[GPT4_ID, "gpt4_bot", "gpt-4"],
[GPT3_5_TURBO_ID, "gpt3.5_bot", "gpt-3.5-turbo"],
[CLAUDE_V2_ID, "claude_bot", "claude-2"],
[GPT4_TURBO_ID, "gpt4t_bot", "gpt-4-turbo"],
]

def self.map_bot_model_to_user_id(model_name)
case model_name
in "gpt-4-turbo"
GPT4_TURBO_ID
in "gpt-3.5-turbo"
GPT3_5_TURBO_ID
in "gpt-4"
Expand Down
16 changes: 13 additions & 3 deletions lib/ai_bot/open_ai_bot.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module AiBot
class OpenAiBot < Bot
def self.can_reply_as?(bot_user)
open_ai_bot_ids = [
DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID,
DiscourseAi::AiBot::EntryPoint::GPT4_ID,
DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID,
]
Expand All @@ -23,7 +24,9 @@ def prompt_limit(allow_commands:)
buffer += @function_size
end

if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID
150_000 - buffer
elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
8192 - buffer
else
16_384 - buffer
Expand Down Expand Up @@ -75,8 +78,15 @@ def tokenizer
end

def model_for(low_cost: false)
return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
"gpt-3.5-turbo-16k"
if low_cost || bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID
"gpt-3.5-turbo-16k"
elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
"gpt-4"
else
# not quite released yet, once released we should replace with
# gpt-4-turbo
"gpt-4-1106-preview"
end
end

def clean_username(username)
Expand Down
9 changes: 8 additions & 1 deletion lib/inference/open_ai_completions.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ def self.perform!(

url =
if model.include?("gpt-4")
if model.include?("32k")
if model.include?("turbo") || model.include?("1106-preview")
URI(SiteSetting.ai_openai_gpt4_turbo_url)
elsif model.include?("32k")
URI(SiteSetting.ai_openai_gpt4_32k_url)
else
URI(SiteSetting.ai_openai_gpt4_url)
Expand Down Expand Up @@ -134,6 +136,11 @@ def self.perform!(

response_raw << chunk

if (leftover + chunk).length < "data: [DONE]".length
leftover += chunk
next
end

(leftover + chunk)
.split("\n")
.each do |line|
Expand Down
73 changes: 73 additions & 0 deletions spec/shared/inference/openai_completions_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
{ setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
{ setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
{ setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
{ setting_name: "ai_openai_gpt4_turbo_url", model: "gpt-4-1106-preview" },
].each do |config|
gpt_url = "#{gpt_url_base}/#{config[:model]}"
setting_name = config[:setting_name]
Expand Down Expand Up @@ -263,6 +264,78 @@
expect(log.raw_response_payload).to eq(request_body)
end

context "when Webmock has streaming support" do
# See: https://github.com/bblimke/webmock/issues/629
let(:mock_net_http) do
Class.new(Net::HTTP) do
def request(*)
super do |response|
response.instance_eval do
def read_body(*, &)
@body.each(&)
end
end

yield response if block_given?

response
end
end
end
end

let(:remove_original_net_http) { Net.send(:remove_const, :HTTP) }
let(:original_http) { remove_original_net_http }
let(:stub_net_http) { Net.send(:const_set, :HTTP, mock_net_http) }

let(:remove_stubbed_net_http) { Net.send(:remove_const, :HTTP) }
let(:restore_net_http) { Net.send(:const_set, :HTTP, original_http) }

before do
mock_net_http
remove_original_net_http
stub_net_http
end

after do
remove_stubbed_net_http
restore_net_http
end

it "support extremely slow streaming" do
raw_data = <<~TEXT
data: {"choices":[{"delta":{"content":"test"}}]}

data: {"choices":[{"delta":{"content":"test1"}}]}

data: {"choices":[{"delta":{"content":"test2"}}]}

data: [DONE]
TEXT

chunks = raw_data.split("")

stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: chunks,
)

partials = []
DiscourseAi::Inference::OpenAiCompletions.perform!([], "gpt-3.5-turbo") do |partial, cancel|
partials << partial
end

expect(partials.length).to eq(3)
expect(partials).to eq(
[
{ choices: [{ delta: { content: "test" } }] },
{ choices: [{ delta: { content: "test1" } }] },
{ choices: [{ delta: { content: "test2" } }] },
],
)
end
end

it "can operate in streaming mode" do
deltas = [
{ role: "assistant" },
Expand Down