From 366c5b7ed2f71aeab14751ff4e50424aa12c56af Mon Sep 17 00:00:00 2001 From: Devran Cosmo Uenal Date: Sat, 18 May 2024 18:16:37 +0200 Subject: [PATCH] Initial commit --- .env.sample | 1 + .gitattributes | 2 + .github/workflows/main.yml | 27 +++ .gitignore | 108 ++++++++++++ .rubocop.yml | 18 ++ CHANGELOG.md | 5 + Gemfile | 19 +++ Gemfile.lock | 71 ++++++++ LICENSE.txt | 21 +++ README.md | 84 ++++++++++ Rakefile | 12 ++ bin/console | 11 ++ bin/setup | 8 + lib/openai_api.rb | 32 ++++ lib/openai_api/completion.rb | 133 +++++++++++++++ lib/openai_api/completion_multi_model.rb | 35 ++++ lib/openai_api/embedding.rb | 100 +++++++++++ lib/openai_api/helper.rb | 27 +++ lib/openai_api/stream_merger.rb | 126 ++++++++++++++ lib/openai_api/version.rb | 5 + openai_api.gemspec | 45 +++++ test/fixtures/response.json | 63 +++++++ test/fixtures/stream_content.jsonl | 19 +++ test/fixtures/stream_content_result.json | 22 +++ test/fixtures/stream_tool_calls.jsonl | 19 +++ test/fixtures/stream_tool_calls_result.json | 48 ++++++ test/test_helper.rb | 6 + test/test_openai_api.rb | 175 ++++++++++++++++++++ 28 files changed, 1242 insertions(+) create mode 100644 .env.sample create mode 100644 .gitattributes create mode 100644 .github/workflows/main.yml create mode 100644 .gitignore create mode 100644 .rubocop.yml create mode 100644 CHANGELOG.md create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 LICENSE.txt create mode 100644 README.md create mode 100644 Rakefile create mode 100755 bin/console create mode 100755 bin/setup create mode 100644 lib/openai_api.rb create mode 100644 lib/openai_api/completion.rb create mode 100644 lib/openai_api/completion_multi_model.rb create mode 100644 lib/openai_api/embedding.rb create mode 100644 lib/openai_api/helper.rb create mode 100644 lib/openai_api/stream_merger.rb create mode 100644 lib/openai_api/version.rb create mode 100644 openai_api.gemspec create mode 100644 test/fixtures/response.json create mode 100644 test/fixtures/stream_content.jsonl create mode 100644 test/fixtures/stream_content_result.json create mode 100644 test/fixtures/stream_tool_calls.jsonl create mode 100644 test/fixtures/stream_tool_calls_result.json create mode 100644 test/test_helper.rb create mode 100644 test/test_openai_api.rb diff --git a/.env.sample b/.env.sample new file mode 100644 index 0000000..ff7adb9 --- /dev/null +++ b/.env.sample @@ -0,0 +1 @@ +OPENAI_API_KEY = REPLACE_ME diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..dfe0770 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..37a9318 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,27 @@ +name: Ruby + +on: + push: + branches: + - main + + pull_request: + +jobs: + build: + runs-on: ubuntu-latest + name: Ruby ${{ matrix.ruby }} + strategy: + matrix: + ruby: + - '3.3.0' + + steps: + - uses: actions/checkout@v4 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby }} + bundler-cache: true + - name: Run the default task + run: bundle exec rake diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f2f7838 --- /dev/null +++ b/.gitignore @@ -0,0 +1,108 @@ +/.bundle/ +/.yardoc +/_yardoc/ +/coverage/ +/doc/ +/pkg/ +/spec/reports/ +/tmp/ + + + +# Created by https://www.toptal.com/developers/gitignore/api/macos,ruby +# Edit at https://www.toptal.com/developers/gitignore?templates=macos,ruby + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Ruby ### +*.gem +*.rbc +/.config +/coverage/ +/InstalledFiles +/pkg/ +/spec/reports/ +/spec/examples.txt +/test/tmp/ +/test/version_tmp/ +/tmp/ + +# Used by dotenv library to load environment variables. +# .env + +# Ignore Byebug command history file. +.byebug_history + +## Specific to RubyMotion: +.dat* +.repl_history +build/ +*.bridgesupport +build-iPhoneOS/ +build-iPhoneSimulator/ + +## Specific to RubyMotion (use of CocoaPods): +# +# We recommend against adding the Pods directory to your .gitignore. However +# you should judge for yourself, the pros and cons are mentioned at: +# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control +# vendor/Pods/ + +## Documentation cache and generated files: +/.yardoc/ +/_yardoc/ +/doc/ +/rdoc/ + +## Environment normalization: +/.bundle/ +/vendor/bundle +/lib/bundler/man/ + +# for a library or gem, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# Gemfile.lock +# .ruby-version +# .ruby-gemset + +# unless supporting rvm < 1.11.0 or doing something fancy, ignore this: +.rvmrc + +# Used by RuboCop. Remote config files pulled in from inherit_from directive. +# .rubocop-https?--* + +# End of https://www.toptal.com/developers/gitignore/api/macos,ruby + +.env +/test/fixtures/vcr_cassettes diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 0000000..9f4e6bb --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,18 @@ +require: + - rubocop-rake + - rubocop-minitest + +AllCops: + TargetRubyVersion: 3.3.0 + NewCops: enable + +Style/StringLiterals: + Enabled: true + EnforcedStyle: double_quotes + +Style/StringLiteralsInInterpolation: + Enabled: true + EnforcedStyle: double_quotes + +Layout/LineLength: + Max: 120 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..3acbe72 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## [0.1.0] - 2024-05-18 + +- Initial release diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..36722f9 --- /dev/null +++ b/Gemfile @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +source "https://rubygems.org" + +# Specify your gem's dependencies in openai_api.gemspec +gemspec + +gem "rake", "~> 13.0" + +gem "minitest", "~> 5.16" + +gem "rubocop", "~> 1.21" +gem "rubocop-minitest" +gem "rubocop-rake" + +gem "vcr" + +# Loads environment variables from .env file +gem "dotenv" diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..8e8c786 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,71 @@ +PATH + remote: . + specs: + openai_api (0.1.0) + event_stream_parser (~> 1.0) + faraday (~> 2.9) + json (~> 2.7.1) + +GEM + remote: https://rubygems.org/ + specs: + ast (2.4.2) + dotenv (3.1.0) + event_stream_parser (1.0.0) + faraday (2.9.0) + faraday-net_http (>= 2.0, < 3.2) + faraday-net_http (3.1.0) + net-http + json (2.7.2) + language_server-protocol (3.17.0.3) + minitest (5.22.3) + net-http (0.4.1) + uri + parallel (1.24.0) + parser (3.3.0.5) + ast (~> 2.4.1) + racc + racc (1.7.3) + rainbow (3.1.1) + rake (13.2.1) + regexp_parser (2.9.0) + rexml (3.2.6) + rubocop (1.63.3) + json (~> 2.3) + language_server-protocol (>= 3.17.0) + parallel (~> 1.10) + parser (>= 3.3.0.2) + rainbow (>= 2.2.2, < 4.0) + regexp_parser (>= 1.8, < 3.0) + rexml (>= 3.2.5, < 4.0) + rubocop-ast (>= 1.31.1, < 2.0) + ruby-progressbar (~> 1.7) + unicode-display_width (>= 2.4.0, < 3.0) + rubocop-ast (1.31.2) + parser (>= 3.3.0.4) + rubocop-minitest (0.35.0) + rubocop (>= 1.61, < 2.0) + rubocop-ast (>= 1.31.1, < 2.0) + rubocop-rake (0.6.0) + rubocop (~> 1.0) + ruby-progressbar (1.13.0) + unicode-display_width (2.5.0) + uri (0.13.0) + vcr (6.2.0) + +PLATFORMS + arm64-darwin-23 + ruby + +DEPENDENCIES + dotenv + minitest (~> 5.16) + openai_api! + rake (~> 13.0) + rubocop (~> 1.21) + rubocop-minitest + rubocop-rake + vcr + +BUNDLED WITH + 2.5.7 diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..b7fd710 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2024 Devran Cosmo Uenal + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..0854812 --- /dev/null +++ b/README.md @@ -0,0 +1,84 @@ +# OpenAI API + +## Installation + +Install the gem and add to the application's Gemfile by executing: + +```sh +bundle add openai_api +``` + +If bundler is not being used to manage dependencies, install the gem by executing: + +```sh +gem install openai_api +``` + +## Usage + +```ruby +class LanguageModel + attr_accessor :name, :kind, :provider, :configuration + + def initialize(name:, kind:, provider:, configuration:) + @name = name + @kind = kind + @provider = provider + @configuration = configuration + end + + def self.all + gpt_4_turbo = LanguageModel.new( + name: "GPT-4 Turbo", + kind: "completion", + provider: "openai", + configuration: { + "api_key" => "your_key", + "model" => "gpt-4-turbo" + } + ) + + ada2 = LanguageModel.new( + name: "Ada2", + kind: "embedding", + provider: "openai", + configuration: { + "api_key" => "your_key", + "model" => "text-embedding-ada-002" + } + ) + + [gpt_4_turbo, ada2] + end +end + +first_completion_model = LanguageModel.all.find { |model| model.kind == "completion" } + +parameters = { + "messages" => [ + { + "role" => "system", + "content" => "Tell me a joke" + } + ] +} +response = OpenAIAPI::Completion + .new(first_completion_model) + .chat(parameters) +puts response + + +first_embedding_model = LanguageModel.all.find { |model| model.kind == "embedding" } + +parameters = { + "input" => "Once upon a time" +} +response = OpenAIAPI::Embedding + .new(first_embedding_model) + .embed(parameters) +puts response +``` + +## License + +The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..2bf771f --- /dev/null +++ b/Rakefile @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +require "bundler/gem_tasks" +require "minitest/test_task" + +Minitest::TestTask.create + +require "rubocop/rake_task" + +RuboCop::RakeTask.new + +task default: %i[test rubocop] diff --git a/bin/console b/bin/console new file mode 100755 index 0000000..3389c37 --- /dev/null +++ b/bin/console @@ -0,0 +1,11 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "openai_api" + +# You can add fixtures and/or initialization code here to make experimenting +# with your gem easier. You can also use a different console, if you like. + +require "irb" +IRB.start(__FILE__) diff --git a/bin/setup b/bin/setup new file mode 100755 index 0000000..dce67d8 --- /dev/null +++ b/bin/setup @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +IFS=$'\n\t' +set -vx + +bundle install + +# Do any other automated setup that you need to do here diff --git a/lib/openai_api.rb b/lib/openai_api.rb new file mode 100644 index 0000000..ccc7a2d --- /dev/null +++ b/lib/openai_api.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require "faraday" + +require_relative "openai_api/version" +require_relative "openai_api/completion" +require_relative "openai_api/completion_multi_model" +require_relative "openai_api/embedding" +require_relative "openai_api/helper" +require_relative "openai_api/stream_merger" + +module OpenAIAPI + class Error < StandardError; end + class ConnectionError < Error; end + class TimeoutError < Error; end + class AuthenticationError < Error; end + class RateLimitError < Error; end + class DeploymentNotFoundError < Error; end + class NotFoundError < Error; end + + class ContentFilterError < Error + attr_reader :content_filters + + def initialize(message, content_filters) + super(message) + @content_filters = content_filters + end + end + + class InvalidRequestError < Error; end + class UnexpectedResponseError < Error; end +end diff --git a/lib/openai_api/completion.rb b/lib/openai_api/completion.rb new file mode 100644 index 0000000..fe07014 --- /dev/null +++ b/lib/openai_api/completion.rb @@ -0,0 +1,133 @@ +# frozen_string_literal: true + +module OpenAIAPI + # A class to call the OpenAI API with a single model + class Completion # rubocop:disable Metrics/ClassLength + attr_reader :name, :api_key, :api_url + attr_accessor :stream, :raw + + END_OF_STREAM_MARKER = "[DONE]" + + def initialize(model, stream: nil, raw: false) + @name = model.name + @api_key = model.configuration["api_key"] + @api_url = "https://api.openai.com/v1/chat/completions" + @stream = stream + @raw = raw + end + + def chat(parameters) + # Rails.logger.info("Chatting with \"#{@name}\" model with URL: #{@api_url}.") + if @stream.nil? + single_request_chat(parameters) + else + stream_chat(parameters) + end + rescue Faraday::ConnectionFailed => e + # Rails.logger.error("API connection failed: #{e.message}") + raise OpenAIAPI::ConnectionError, "Connection to API failed: #{e}" + rescue Faraday::TimeoutError => e + # Rails.logger.error("API request timed out: #{e.message}") + raise OpenAIAPI::TimeoutError, "API request timed out: #{e}" + end + + private + + def connection + @connection ||= Faraday.new(url: @api_url, headers: request_headers) do |faraday| + faraday.options.open_timeout = 60 # set connection timeout + faraday.options.timeout = 300 # set read timeout + end + end + + def request_headers + { + "Content-Type" => "application/json", + "Authorization" => "Bearer #{api_key}" + } + end + + def single_request_chat(parameters) + response = connection.post do |request| + request.body = parameters.to_json + end + + return response if @raw + + handle_response(response) + end + + def handle_response(response) + return JSON.parse(response.body) if response.status == 200 + + handle_error(response.status, response.body) + end + + def stream_chat(parameters) + parameters = parameters.merge(stream: true) + parser = EventStreamParser::Parser.new + + connection.post do |request| + request.options.on_data = proc do |chunk, _, env| + handle_stream_chunk(chunk, env, parser) + end + request.body = parameters.to_json + end + end + + def handle_stream_chunk(chunk, env, parser) + handle_error(env.status, chunk) unless env.status == 200 + + parser.feed(chunk) do |_type, data, _id| + next if data == END_OF_STREAM_MARKER + + @stream&.call(JSON.parse(data), env) + end + end + + def handle_error(status, response_body = nil) + error_response = parse_error_response(response_body) + case status + when 400 then handle_error400(error_response) + when 401 then raise OpenAIAPI::AuthenticationError, "Invalid API key: \n#{error_response}" + when 404 then handle_error404(error_response) + when 429 then raise OpenAIAPI::RateLimitError, "Rate limit exceeded: \n#{error_response}" + else handle_unknown_error(status, error_response) + end + end + + def handle_error404(error_response) + case error_response["code"] + when "DeploymentNotFound" + raise OpenAIAPI::DeploymentNotFoundError, "Deployment not found: \n#{error_response}" + else + raise OpenAIAPI::NotFoundError, "Resource not found: \n#{error_response}" + end + end + + def handle_error400(error_response) + case error_response["code"] + when "content_filter" + raise OpenAIAPI::ContentFilterError, "Content filter triggered: \n#{error_response}" + else + raise OpenAIAPI::InvalidRequestError, "Invalid request: \n#{error_response}" + end + end + + def handle_unknown_error(status, error_response) + error_message = "Unexpected response from API: \n#{status}" + error_message += " - #{error_response}" unless error_response.empty? + raise OpenAIAPI::UnexpectedResponseError, error_message + end + + def parse_error_response(body) + return "" if body.nil? || body.empty? + + begin + JSON.parse(body)["error"] + rescue OpenAIAPI::Error + "Error details not available" + end + end + end +end diff --git a/lib/openai_api/completion_multi_model.rb b/lib/openai_api/completion_multi_model.rb new file mode 100644 index 0000000..b3dc0c3 --- /dev/null +++ b/lib/openai_api/completion_multi_model.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module OpenAIAPI + # Call the OpenAI API with multiple models + class CompletionMultiModel + attr_reader :models + attr_accessor :stream + + def initialize(models, stream: nil) + @models = models + @stream = stream + end + + def chat(parameters) # rubocop:disable Metrics/MethodLength + return if @models.empty? + + @models.each do |model| + openai_client = OpenAIAPI::Completion.new(model, stream: @stream) + return openai_client.chat(parameters) + rescue OpenAIAPI::NotFoundError, + OpenAIAPI::DeploymentNotFoundError, + OpenAIAPI::AuthenticationError, + OpenAIAPI::RateLimitError, + OpenAIAPI::InvalidRequestError, + OpenAIAPI::UnexpectedResponseError => e + puts "Model #{model.name} not available" + puts e.message + puts "Trying next model..." + next + end + + raise OpenAIAPI::NotFoundError, "No models available" + end + end +end diff --git a/lib/openai_api/embedding.rb b/lib/openai_api/embedding.rb new file mode 100644 index 0000000..99715ca --- /dev/null +++ b/lib/openai_api/embedding.rb @@ -0,0 +1,100 @@ +# frozen_string_literal: true + +module OpenAIAPI + # A class to call the OpenAI API with a single model + class Embedding + attr_reader :name, :api_key, :api_url + + def initialize(model) + @name = model.name + @api_key = model.configuration["api_key"] + @api_url = "https://api.openai.com/v1/embeddings" + end + + def embed(parameters) + # Rails.logger.info("Embedding with \"#{@name}\" model with URL: #{@api_url}.") + response = connection.post do |request| + request.params = params + request.body = parameters.to_json + end + handle_response(response) + rescue Faraday::ConnectionFailed => e + # Rails.logger.error("API connection failed: #{e.message}") + raise OpenAIAPI::ConnectionError, "Connection to API failed: #{e}" + rescue Faraday::TimeoutError => e + # Rails.logger.error("API request timed out: #{e.message}") + raise OpenAIAPI::TimeoutError, "API request timed out: #{e}" + end + + private + + def connection + @connection ||= Faraday.new(url: @api_url, headers: request_headers) do |faraday| + faraday.options.open_timeout = 10 # set connection timeout + faraday.options.timeout = 60 # set read timeout + end + end + + def request_headers + { + "Content-Type" => "application/json", + "Authorization" => "Bearer #{api_key}" + } + end + + def params + { } + end + + def handle_response(response) + return JSON.parse(response.body) if response.status == 200 + + handle_error(response.status, response.body) + end + + def handle_error(status, response_body = nil) + error_response = parse_error_response(response_body) + case status + when 400 then handle_error400(error_response) + when 401 then raise OpenAIAPI::AuthenticationError, "Invalid API key: \n#{error_response}" + when 404 then handle_error404(error_response) + when 429 then raise OpenAIAPI::RateLimitError, "Rate limit exceeded: \n#{error_response}" + else handle_unknown_error(status, error_response) + end + end + + def handle_error404(error_response) + case error_response["code"] + when "DeploymentNotFound" + raise OpenAIAPI::DeploymentNotFoundError, "Deployment not found: \n#{error_response}" + else + raise OpenAIAPI::NotFoundError, "Resource not found: \n#{error_response}" + end + end + + def handle_error400(error_response) + case error_response["code"] + when "content_filter" + raise OpenAIAPI::ContentFilterError, "Content filter triggered: \n#{error_response}" + else + raise OpenAIAPI::InvalidRequestError, "Invalid request: \n#{error_response}" + end + end + + def handle_unknown_error(status, error_response) + error_message = "Unexpected response from API: \n#{status}" + error_message += " - #{error_response}" unless error_response.empty? + raise OpenAIAPI::UnexpectedResponseError, error_message + end + + def parse_error_response(body) + return "" if body.nil? || body.empty? + + begin + JSON.parse(body)["error"] + rescue OpenAIAPI::Error + "Error details not available" + end + end + end +end diff --git a/lib/openai_api/helper.rb b/lib/openai_api/helper.rb new file mode 100644 index 0000000..c6a1eae --- /dev/null +++ b/lib/openai_api/helper.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module OpenAIAPI + # Helper methods for the OpenAI API + class Helper + def self.clean_body(body) + body.merge({ + "choices" => clean_choices(body["choices"]) + }) + .except("object") + .except("created") + .except("model") + .except("prompt_filter_results") + .except("system_fingerprint") + .except("id") + end + + def self.clean_choices(choices) + choices.map do |choice| + choice + .except("content_filter_results") + .except("finish_reason") + .except("index") + end + end + end +end diff --git a/lib/openai_api/stream_merger.rb b/lib/openai_api/stream_merger.rb new file mode 100644 index 0000000..ff85bf8 --- /dev/null +++ b/lib/openai_api/stream_merger.rb @@ -0,0 +1,126 @@ +# frozen_string_literal: true + +module OpenAIAPI + # StreamMerger takes an OpenAI stream of data and collects it into a single object + class StreamMerger + attr_accessor :merged + + def initialize + @merged = {} + end + + def merge(stream_chunk) + stream_chunk.each do |key, value| + next if !@merged[key].nil? && (key == "choices") + + @merged[key] = value if @merged[key].to_s.empty? + end + + stream_chunk["choices"]&.each do |choice| + merge_choice(choice) + end + end + + def any_choice_finished? + choices.any? { |choice| !choice["finish_reason"].nil? } + end + + def all_choices_finished? + return false if choices.empty? + + choices.all? { |choice| !choice["finish_reason"].nil? } + end + + private + + def merge_choice(choice) + choice_index = choice["index"] + merged_choice = find_choice_by_index(choice_index) + + if merged_choice + update_choice(merged_choice, choice) + merge_delta(merged_choice["delta"], choice) + else + @merged["choices"] << choice + end + end + + def find_choice_by_index(choice_index) + @merged["choices"].find { |c| c["index"] == choice_index } + end + + def update_choice(merged_choice, choice) + choice.each do |key, value| + # Skip if the key is "delta" and it's already set, or if the existing value is not empty. + next if key == "delta" && !merged_choice[key].nil? + + merged_choice[key] = value if merged_choice[key].to_s.empty? + end + end + + def merge_delta(merged_delta, choice) + return unless merged_delta + + merged_delta.each do |key, value| + next if !merged_delta[key].nil? && (%w[content tool_calls].any? key) + + merged_delta[key] = value if merged_delta[key].to_s.empty? + end + + merge_delta_content(choice.dig("delta", "content"), merged_delta) + merge_delta_tool_calls(choice.dig("delta", "tool_calls"), merged_delta) + end + + def merge_delta_content(content, merged_delta) + return if content.nil? + + previous_content = merged_delta["content"] || "" + merged_delta["content"] = (previous_content + content) + end + + def merge_delta_tool_calls(tool_calls, merged_delta) + # Proceed only if tool_calls is not nil and not empty. + return if tool_calls.nil? || tool_calls.empty? + + tool_calls.each do |tool_call| + process_tool_call(tool_call, merged_delta) + end + end + + def process_tool_call(tool_call, merged_delta) + tool_call_index = tool_call["index"] + merged_tool_call = find_merged_tool_call_by_index(tool_call_index, merged_delta) + + return unless merged_tool_call + + tool_call.each do |key, value| + next if !merged_tool_call[key].nil? && (key == "function") + + merged_tool_call[key] = value if merged_tool_call[key].to_s.empty? + end + + append_function_arguments(tool_call["function"]["arguments"], merged_tool_call) + end + + def find_merged_tool_call_by_index(index, merged_delta) + merged_delta["tool_calls"].find { |tc| tc["index"] == index } + end + + def append_function_arguments(function_arguments, merged_tool_call) + # Early return if function_arguments is nil to avoid unnecessary processing + return if function_arguments.nil? + + # Ensure there's a "function" key with a default hash, and + # "arguments" key initialized to an empty string if not present + merged_tool_call["function"] ||= {} + merged_tool_call["function"]["arguments"] ||= "" + + # Append new function arguments to the existing ones + merged_tool_call["function"]["arguments"] << function_arguments + end + + def choices + @merged["choices"] || [] + end + end +end diff --git a/lib/openai_api/version.rb b/lib/openai_api/version.rb new file mode 100644 index 0000000..569e81e --- /dev/null +++ b/lib/openai_api/version.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +module OpenAIAPI + VERSION = "0.1.0" +end diff --git a/openai_api.gemspec b/openai_api.gemspec new file mode 100644 index 0000000..5a2b2bc --- /dev/null +++ b/openai_api.gemspec @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +require_relative "lib/openai_api/version" + +Gem::Specification.new do |spec| + spec.name = "openai_api" + spec.version = OpenAIAPI::VERSION + spec.authors = ["Devran Cosmo Uenal"] + spec.email = ["maccosmo@gmail.com"] + + spec.summary = "Unofficial OpenAI API Client" + spec.description = "An unofficial client for the OpenAI API." + spec.homepage = "https://bavmind.com" + spec.license = "MIT" + spec.required_ruby_version = ">= 3.3.0" + + spec.metadata["allowed_push_host"] = "https://rubygems.pkg.github.com/bavmind" + spec.metadata["rubygems_mfa_required"] = "true" + + spec.metadata["homepage_uri"] = spec.homepage + spec.metadata["source_code_uri"] = "https://github.com/bavmind/openai_api" + spec.metadata["changelog_uri"] = "https://github.com/bavmind/openai_api/blob/main/CHANGELOG.md" + + # Specify which files should be added to the gem when it is released. + # The `git ls-files -z` loads the files in the RubyGem that have been added into git. + gemspec = File.basename(__FILE__) + spec.files = IO.popen(%w[git ls-files -z], chdir: __dir__, err: IO::NULL) do |ls| + ls.readlines("\x0", chomp: true).reject do |f| + (f == gemspec) || + f.start_with?(*%w[bin/ test/ spec/ features/ .git .github appveyor Gemfile]) + end + end + spec.bindir = "exe" + spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) } + spec.require_paths = ["lib"] + + # Uncomment to register a new dependency of your gem + # spec.add_dependency "example-gem", "~> 1.0" + spec.add_dependency "event_stream_parser", "~> 1.0" + spec.add_dependency "faraday", "~> 2.9" + spec.add_dependency "json", "~> 2.7.1" + + # For more information and examples about making a new gem, check out our + # guide at: https://bundler.io/guides/creating_gem.html +end diff --git a/test/fixtures/response.json b/test/fixtures/response.json new file mode 100644 index 0000000..e76ba0f --- /dev/null +++ b/test/fixtures/response.json @@ -0,0 +1,63 @@ +{ + "id": "chatcmpl-98srcBUwUVmk6wE6Fta3gF2XLZ7NM", + "object": "chat.completion", + "created": 1711904548, + "model": "gpt-35-turbo", + "prompt_filter_results": [ + { + "prompt_index": 0, + "content_filter_results": { + "hate": { + "filtered": false, + "severity": "safe" + }, + "self_harm": { + "filtered": false, + "severity": "safe" + }, + "sexual": { + "filtered": false, + "severity": "safe" + }, + "violence": { + "filtered": false, + "severity": "safe" + } + } + } + ], + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "role": "assistant", + "content": "Why don't scientists trust atoms?\n\nBecause they make up everything!" + }, + "content_filter_results": { + "hate": { + "filtered": false, + "severity": "safe" + }, + "self_harm": { + "filtered": false, + "severity": "safe" + }, + "sexual": { + "filtered": false, + "severity": "safe" + }, + "violence": { + "filtered": false, + "severity": "safe" + } + } + } + ], + "usage": { + "prompt_tokens": 11, + "completion_tokens": 13, + "total_tokens": 24 + }, + "system_fingerprint": "fp_68a7d165bf" +} diff --git a/test/fixtures/stream_content.jsonl b/test/fixtures/stream_content.jsonl new file mode 100644 index 0000000..5de9553 --- /dev/null +++ b/test/fixtures/stream_content.jsonl @@ -0,0 +1,19 @@ +{"model":"","choices":[]} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"role":"assistant"}},{"finish_reason":null,"index":1,"delta":{"role":"assistant"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":"Why"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":" don"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":"'t"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":" scientists"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":" trust"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":" atoms"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":"?\n\n"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":"Because"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":" they"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":" make"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":1,"delta":{"content":"Hello"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":" up"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":" everything"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":1,"delta":{"content":" world!"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":"stop","index":1,"delta":{}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":null,"index":0,"delta":{"content":"!"}}],"system_fingerprint":"fp_68a7d165bf"} +{"model":"gpt-35-turbo","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"system_fingerprint":"fp_68a7d165bf"} diff --git a/test/fixtures/stream_content_result.json b/test/fixtures/stream_content_result.json new file mode 100644 index 0000000..490dbaf --- /dev/null +++ b/test/fixtures/stream_content_result.json @@ -0,0 +1,22 @@ +{ + "model": "gpt-35-turbo", + "choices": [ + { + "index": 0, + "delta": { + "content": "Why don't scientists trust atoms?\n\nBecause they make up everything!", + "role": "assistant" + }, + "finish_reason": "stop" + }, + { + "index": 1, + "delta": { + "content": "Hello world!", + "role": "assistant" + }, + "finish_reason": "stop" + } + ], + "system_fingerprint": "fp_68a7d165bf" +} diff --git a/test/fixtures/stream_tool_calls.jsonl b/test/fixtures/stream_tool_calls.jsonl new file mode 100644 index 0000000..69181dc --- /dev/null +++ b/test/fixtures/stream_tool_calls.jsonl @@ -0,0 +1,19 @@ +{"choices":[],"model":""} +{"choices":[{"delta":{"role":"assistant","tool_calls":[{"function":{"arguments":"","name":"knowledge_base_retrieval"},"index":0,"type":"function"}, {"function":{"arguments":"","name":"search"},"index":1,"type":"function"}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"role":"assistant","tool_calls":[{"function":{"arguments":"","name":"create_ticket"},"index":0,"type":"function"}]},"finish_reason":null,"index":1}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"{\""},"index":0}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"query"},"index":0}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"_string"},"index":0}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"\":\""},"index":0}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"found"},"index":0}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"ers"},"index":0}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":" of"},"index":0}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":" Bav"},"index":0}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"{\""},"index":0}]},"finish_reason":null,"index":1}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"\"}"},"index":0}]},"finish_reason":null,"index":1}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{},"finish_reason":"tool_calls","index":1}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"Mind"},"index":0}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"\"}"},"index":0}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"{\""},"index":1}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"\"}"},"index":1}]},"finish_reason":null,"index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} +{"choices":[{"delta":{},"finish_reason":"tool_calls","index":0}],"model":"gpt-4","system_fingerprint":"fp_2f57f81c11"} diff --git a/test/fixtures/stream_tool_calls_result.json b/test/fixtures/stream_tool_calls_result.json new file mode 100644 index 0000000..55aff9d --- /dev/null +++ b/test/fixtures/stream_tool_calls_result.json @@ -0,0 +1,48 @@ +{ + "choices": [ + { + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "{\"query_string\":\"founders of BavMind\"}", + "name": "knowledge_base_retrieval" + }, + "index": 0, + "type": "function" + }, + { + "function": { + "arguments": "{\"\"}", + "name": "search" + }, + "index": 1, + "type": "function" + } + ] + }, + "finish_reason": "tool_calls", + "index": 0 + }, + { + "delta": { + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": "{\"\"}", + "name": "create_ticket" + }, + "index": 0, + "type": "function" + } + ] + }, + "finish_reason": "tool_calls", + "index": 1 + } + ], + "model": "gpt-4", + "system_fingerprint": "fp_2f57f81c11" +} diff --git a/test/test_helper.rb b/test/test_helper.rb new file mode 100644 index 0000000..5a8349c --- /dev/null +++ b/test/test_helper.rb @@ -0,0 +1,6 @@ +# frozen_string_literal: true + +$LOAD_PATH.unshift File.expand_path("../lib", __dir__) +require "openai_api" + +require "minitest/autorun" diff --git a/test/test_openai_api.rb b/test/test_openai_api.rb new file mode 100644 index 0000000..88020df --- /dev/null +++ b/test/test_openai_api.rb @@ -0,0 +1,175 @@ +# frozen_string_literal: true + +require "test_helper" +require "json" +require "dotenv/load" +require "vcr" + +VCR.configure do |config| + config.cassette_library_dir = "test/fixtures/vcr_cassettes" + config.hook_into :faraday +end + +class LanguageModel + attr_accessor :name, :kind, :provider, :configuration + + def initialize(name:, kind:, provider:, configuration:) + @name = name + @kind = kind + @provider = provider + @configuration = configuration + end + + def self.all # rubocop:disable Metrics/MethodLength + ada2 = LanguageModel.new( + name: "Ada2", + kind: "embedding", + provider: "openai", + configuration: { + "api_key" => ENV.fetch("OPENAI_API_KEY"), + "model" => "text-embedding-ada-002" + } + ) + + gpt4turbo = LanguageModel.new( + name: "GPT-4-Turbo", + kind: "completion", + provider: "openai", + configuration: { + "api_key" => ENV.fetch("OPENAI_API_KEY"), + "model" => "gpt-4-turbo" + } + ) + + [ada2, gpt4turbo] + end +end + +class TestOpenAIAPI < Minitest::Test + def test_that_it_has_a_version_number + refute_nil ::OpenAIAPI::VERSION + end + + def test_helper_clean_body # rubocop:disable Metrics/MethodLength + response = File.read(File.expand_path("fixtures/response.json", __dir__)) + json = JSON.parse(response) + + result = { + "choices" => [ + { + "message" => { + "role" => "assistant", + "content" => "Why don't scientists trust atoms?\n\nBecause they make up everything!" + } + } + ], + "usage" => { + "prompt_tokens" => 11, + "completion_tokens" => 13, + "total_tokens" => 24 + } + } + + assert_equal result, OpenAIAPI::Helper.clean_body(json) + end + + def test_stream_merger_content + kind = "content" + test_stream_merger(kind) + end + + def test_stream_merger_tool_calls + kind = "tool_calls" + test_stream_merger(kind) + end + + def test_chat # rubocop:disable Metrics/MethodLength + first_completion_model = LanguageModel.all.find { |model| model.kind == "completion" } + client = OpenAIAPI::Completion.new(first_completion_model) + parameters = { + "messages" => [ + { + "role" => "system", + "content" => "Tell me a joke" + } + ], + "model": "gpt-4o" + } + + VCR.use_cassette("test_chat") do + response = client.chat(parameters) + completion_text = response.dig("choices", 0, "message", "content") + + assert_instance_of String, completion_text + end + end + + def test_chat_content_filter # rubocop:disable Metrics/MethodLength + first_completion_model = LanguageModel.all.find { |model| model.kind == "completion" } + client = OpenAIAPI::Completion.new(first_completion_model) + parameters = { + "messages" => [ + { + "role" => "system", + "content" => "You are a nice bot." + }, + { + "role" => "user", + "content" => "FUCK SHIT PISS." + } + ], + "model": "gpt-4o" + } + + VCR.use_cassette("test_chat_content_filter") do + response = client.chat(parameters) + completion_text = response.dig("choices", 0, "message", "content") + + assert_instance_of String, completion_text + end + end + + def test_embedding + first_embedding_model = LanguageModel.all.find { |model| model.kind == "embedding" } + client = OpenAIAPI::Embedding.new(first_embedding_model) + parameters = { + "input" => "Once upon a time", + "model": "text-embedding-ada-002", + } + + VCR.use_cassette("test_embedding") do + response = client.embed(parameters) + embedding = response.dig("data", 0, "embedding") + + assert_instance_of Array, embedding + end + end + + private + + def test_stream_merger(kind) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength + input_file_path = "stream_#{kind}.jsonl" + reference_file_path = "stream_#{kind}_result.json" + + merger = OpenAIAPI::StreamMerger.new + + stream_data = File + .read(File.expand_path("fixtures/#{input_file_path}", __dir__)) + .each_line + .map { |line| JSON.parse(line) unless line.strip.empty? } + .compact + + stream_data.each do |stream_chunk| + merger.merge(stream_chunk) + end + + merged_stream_data = merger.merged + + # Ensuring choices are sorted by index for consistency + merged_stream_data["choices"].sort_by! { |choice| choice["index"] } + + reference = JSON.parse(File.read(File.expand_path("fixtures/#{reference_file_path}", __dir__))) + + assert_equal reference, merged_stream_data, "Stream merger failed for #{kind}" + end +end