discourse · SamSaffron · Dec 11, 2023 · Dec 11, 2023 · Dec 11, 2023
diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
@@ -125,6 +125,7 @@ en:
 
         bot_names:
           gpt-4: "GPT-4"
+          gpt-4-turbo: "GPT-4 Turbo"
           gpt-3:
             5-turbo: "GPT-3.5"
           claude-2: "Claude 2"
@@ -135,7 +136,7 @@ en:
           label: "sentiment"
           title: "Experimental AI-powered sentiment analysis of this person's most recent posts."
 
-      
+
 
     review:
       types:

diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml
@@ -42,6 +42,7 @@ en:
     ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
     ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
     ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
+    ai_openai_gpt4_turbo_url: "Custom URL used for GPT 4 turbo chat completions. (for Azure support)"
     ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)"
     ai_openai_organization: "(Optional, leave empty to omit) Organization id used for the OpenAI API. Passed in using the OpenAI-Organization header."
     ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"

diff --git a/config/settings.yml b/config/settings.yml
@@ -95,6 +95,7 @@ discourse_ai:
   ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
   ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
   ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
+  ai_openai_gpt4_turbo_url: "https://api.openai.com/v1/chat/completions"
   ai_openai_dall_e_3_url: "https://api.openai.com/v1/images/generations"
   ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
   ai_openai_organization: ""
@@ -256,6 +257,7 @@ discourse_ai:
     choices:
       - gpt-3.5-turbo
       - gpt-4
+      - gpt-4-turbo
       - claude-2
   ai_bot_add_to_header:
     default: true

diff --git a/lib/ai_bot/entry_point.rb b/lib/ai_bot/entry_point.rb
@@ -8,14 +8,18 @@ class EntryPoint
       GPT4_ID = -110
       GPT3_5_TURBO_ID = -111
       CLAUDE_V2_ID = -112
+      GPT4_TURBO_ID = -113
       BOTS = [
         [GPT4_ID, "gpt4_bot", "gpt-4"],
         [GPT3_5_TURBO_ID, "gpt3.5_bot", "gpt-3.5-turbo"],
         [CLAUDE_V2_ID, "claude_bot", "claude-2"],
+        [GPT4_TURBO_ID, "gpt4t_bot", "gpt-4-turbo"],
       ]
 
       def self.map_bot_model_to_user_id(model_name)
         case model_name
+        in "gpt-4-turbo"
+          GPT4_TURBO_ID
         in "gpt-3.5-turbo"
           GPT3_5_TURBO_ID
         in "gpt-4"

diff --git a/lib/ai_bot/open_ai_bot.rb b/lib/ai_bot/open_ai_bot.rb
@@ -5,6 +5,7 @@ module AiBot
     class OpenAiBot < Bot
       def self.can_reply_as?(bot_user)
         open_ai_bot_ids = [
+          DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID,
           DiscourseAi::AiBot::EntryPoint::GPT4_ID,
           DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID,
         ]
@@ -23,7 +24,9 @@ def prompt_limit(allow_commands:)
           buffer += @function_size
         end
 
-        if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
+        if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID
+          150_000 - buffer
+        elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
           8192 - buffer
         else
           16_384 - buffer
@@ -75,8 +78,15 @@ def tokenizer
       end
 
       def model_for(low_cost: false)
-        return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
-        "gpt-3.5-turbo-16k"
+        if low_cost || bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID
+          "gpt-3.5-turbo-16k"
+        elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
+          "gpt-4"
+        else
+          # not quite released yet, once released we should replace with
+          # gpt-4-turbo
+          "gpt-4-1106-preview"
+        end
       end
 
       def clean_username(username)

diff --git a/lib/inference/open_ai_completions.rb b/lib/inference/open_ai_completions.rb
@@ -29,7 +29,9 @@ def self.perform!(
 
         url =
           if model.include?("gpt-4")
-            if model.include?("32k")
+            if model.include?("turbo") || model.include?("1106-preview")
+              URI(SiteSetting.ai_openai_gpt4_turbo_url)
+            elsif model.include?("32k")
               URI(SiteSetting.ai_openai_gpt4_32k_url)
             else
               URI(SiteSetting.ai_openai_gpt4_url)
@@ -134,6 +136,11 @@ def self.perform!(
 
                 response_raw << chunk
 
+                if (leftover + chunk).length < "data: [DONE]".length
+                  leftover += chunk
+                  next
+                end
+
                 (leftover + chunk)
                   .split("\n")
                   .each do |line|

diff --git a/spec/shared/inference/openai_completions_spec.rb b/spec/shared/inference/openai_completions_spec.rb
@@ -45,6 +45,7 @@
         { setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
         { setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
         { setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
+        { setting_name: "ai_openai_gpt4_turbo_url", model: "gpt-4-1106-preview" },
       ].each do |config|
         gpt_url = "#{gpt_url_base}/#{config[:model]}"
         setting_name = config[:setting_name]
@@ -263,6 +264,78 @@
     expect(log.raw_response_payload).to eq(request_body)
   end
 
+  context "when Webmock has streaming support" do
+    # See: https://github.com/bblimke/webmock/issues/629
+    let(:mock_net_http) do
+      Class.new(Net::HTTP) do
+        def request(*)
+          super do |response|
+            response.instance_eval do
+              def read_body(*, &)
+                @body.each(&)
+              end
+            end
+
+            yield response if block_given?
+
+            response
+          end
+        end
+      end
+    end
+
+    let(:remove_original_net_http) { Net.send(:remove_const, :HTTP) }
+    let(:original_http) { remove_original_net_http }
+    let(:stub_net_http) { Net.send(:const_set, :HTTP, mock_net_http) }
+
+    let(:remove_stubbed_net_http) { Net.send(:remove_const, :HTTP) }
+    let(:restore_net_http) { Net.send(:const_set, :HTTP, original_http) }
+
+    before do
+      mock_net_http
+      remove_original_net_http
+      stub_net_http
+    end
+
+    after do
+      remove_stubbed_net_http
+      restore_net_http
+    end
+
+    it "support extremely slow streaming" do
+      raw_data = <<~TEXT
+data: {"choices":[{"delta":{"content":"test"}}]}
+
+data: {"choices":[{"delta":{"content":"test1"}}]}
+
+data: {"choices":[{"delta":{"content":"test2"}}]}
+
+data: [DONE]
+    TEXT
+
+      chunks = raw_data.split("")
+
+      stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+        status: 200,
+        body: chunks,
+      )
+
+      partials = []
+      DiscourseAi::Inference::OpenAiCompletions.perform!([], "gpt-3.5-turbo") do |partial, cancel|
+        partials << partial
+      end
+
+      expect(partials.length).to eq(3)
+      expect(partials).to eq(
+        [
+          { choices: [{ delta: { content: "test" } }] },
+          { choices: [{ delta: { content: "test1" } }] },
+          { choices: [{ delta: { content: "test2" } }] },
+        ],
+      )
+    end
+  end
+
   it "can operate in streaming mode" do
     deltas = [
       { role: "assistant" },