cpetersen · cpetersen · Sep 8, 2025 · Sep 8, 2025 · Sep 8, 2025 · Sep 8, 2025
diff --git a/.gitattributes b/.gitattributes
diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
@@ -49,8 +49,6 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
-      with:
-        lfs: true
 
     - name: Set up Ruby
       uses: ruby/setup-ruby@v1
@@ -67,13 +65,24 @@ jobs:
       run: bundle exec rubocop
 
     - name: Run tests
+      if: matrix.ruby-version != '3.4' || matrix.rails-version != 'rails-8.0'
+      run: bundle exec appraisal ${{ matrix.rails-version }} rspec --tag '~generator'
+      env: # Dummy environment variables for local providers
+        OLLAMA_API_BASE: http://localhost:11434/v1
+        GPUSTACK_API_BASE: http://localhost:11444/v1
+        GPUSTACK_API_KEY: test
+        SKIP_COVERAGE: true
+
+    - name: Run full test suite with coverage (latest Ruby/Rails)
+      if: matrix.ruby-version == '3.4' && matrix.rails-version == 'rails-8.0'
       run: bundle exec appraisal ${{ matrix.rails-version }} rspec
       env: # Dummy environment variables for local providers
         OLLAMA_API_BASE: http://localhost:11434/v1
         GPUSTACK_API_BASE: http://localhost:11444/v1
         GPUSTACK_API_KEY: test
 
     - name: Upload coverage to Codecov
+      if: matrix.ruby-version == '3.4' && matrix.rails-version == 'rails-8.0'
       uses: codecov/codecov-action@v5
       env:
         CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
@@ -85,11 +94,12 @@ jobs:
       run: |
         FARADAY_VERSION=1.10.3 bundle install
         bundle exec appraisal ${{ matrix.rails-version }} bundle install
-        bundle exec appraisal ${{ matrix.rails-version }} rspec
+        bundle exec appraisal ${{ matrix.rails-version }} rspec --tag '~generator'
       env: # Dummy environment variables for local providers
         OLLAMA_API_BASE: http://localhost:11434/v1
         GPUSTACK_API_BASE: http://localhost:11444/v1
         GPUSTACK_API_KEY: test
+        SKIP_COVERAGE: true
 
   publish:
     name: Build + Publish
@@ -99,8 +109,6 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
-      with:
-        lfs: true
 
     - name: Set up Ruby
       uses: ruby/setup-ruby@v1

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -24,8 +24,6 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v4
-        with:
-          lfs: true
 
       - name: Setup Ruby for models guide generation (root Gemfile)
         uses: ruby/setup-ruby@v1

diff --git a/.overcommit.yml b/.overcommit.yml
@@ -9,7 +9,7 @@ PreCommit:
 
   RSpec:
     enabled: true
-    command: ['bundle', 'exec', 'rspec']
+    command: ['bundle', 'exec', 'rspec', '--tag', '~generator']
     on_warn: fail
 
   TrailingWhitespace:
@@ -24,20 +24,9 @@ PreCommit:
     description: 'Update appraisal gemfiles'
     command: ['bundle', 'exec', 'appraisal', 'update']
 
-PrePush:
-  GitLfs:
-    enabled: true
-    description: 'Push LFS objects to remote'
-    command: ['bash', '-c', 'git lfs pre-push "$@"', '--']
-
 PostCheckout:
   ALL: # Special hook name that customizes all hooks of this type
     quiet: true # Change all post-checkout hooks to only display output on failure
 
   IndexTags:
-    enabled: true # Generate a tags file with `ctags` each time HEAD changes
-
-  LfsInstall:
-    enabled: true
-    description: 'Ensure Git LFS files are pulled'
-    command: ['git', 'lfs', 'pull']
+    enabled: true # Generate a tags file with `ctags` each time HEAD changes
diff --git a/.rubocop.yml b/.rubocop.yml
@@ -10,6 +10,7 @@ AllCops:
     - docs/**/*
     - vendor/**/*
     - gemfiles/**/*
+    - lib/generators/**/templates/**/*
   SuggestExtensions: false
 
 Metrics/ClassLength:

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -55,6 +55,39 @@ rake vcr:record[all]               # Everything
 
 Always check cassettes for leaked API keys before committing.
 
+## Optional Dependencies
+
+### Red Candle Provider
+
+The Red Candle provider enables local LLM execution using quantized GGUF models. It requires a Rust toolchain, so it's optional for contributors.
+
+**To work WITHOUT Red Candle (default):**
+```bash
+bundle install
+bundle exec rspec  # Red Candle tests will be skipped
+```
+
+**To work WITH Red Candle:**
+```bash
+# Enable the Red Candle gem group
+bundle config set --local with red_candle
+bundle install
+
+# Run tests with stubbed Red Candle (fast, default)
+bundle exec rspec
+
+# Run tests with real inference (slow, downloads models)
+RED_CANDLE_REAL_INFERENCE=true bundle exec rspec
+```
+
+**To switch back to working without Red Candle:**
+```bash
+bundle config unset with
+bundle install
+```
+
+The `bundle config` settings are stored in `.bundle/config` (gitignored), so each developer can choose their own setup without affecting others.
+
 ## Important Notes
 
 * **Never edit `models.json`, `aliases.json`, or `available-models.md`** - they're auto-generated by `rake models`

diff --git a/Gemfile b/Gemfile
@@ -41,3 +41,9 @@ group :development do # rubocop:disable Metrics/BlockLength
   # Optional dependency for Vertex AI
   gem 'googleauth'
 end
+
+# Optional group for Red Candle provider (requires Rust toolchain)
+# To include: bundle config set --local with red-candle
+group :red_candle, optional: true do
+  gem 'red-candle', '~> 1.3'
+end
diff --git a/README.md b/README.md
@@ -9,8 +9,8 @@
 
 Battle tested at [<picture><source media="(prefers-color-scheme: dark)" srcset="https://chatwithwork.com/logotype-dark.svg"><img src="https://chatwithwork.com/logotype.svg" alt="Chat with Work" height="30" align="absmiddle"></picture>](https://chatwithwork.com) — *Claude Code for your documents*
 
-[![Gem Version](https://badge.fury.io/rb/ruby_llm.svg?a=7)](https://badge.fury.io/rb/ruby_llm)
-[![Ruby Style Guide](https://img.shields.io/badge/code_style-standard-brightgreen.svg)](https://github.com/testdouble/standard)
+[![Gem Version](https://badge.fury.io/rb/ruby_llm.svg?a=10)](https://badge.fury.io/rb/ruby_llm)
+[![Ruby Style Guide](https://img.shields.io/badge/code_style-rubocop-brightgreen.svg)](https://github.com/rubocop/rubocop)
 [![Gem Downloads](https://img.shields.io/gem/dt/ruby_llm)](https://rubygems.org/gems/ruby_llm)
 [![codecov](https://codecov.io/gh/crmne/ruby_llm/branch/main/graph/badge.svg?a=2)](https://codecov.io/gh/crmne/ruby_llm)
 
@@ -41,6 +41,7 @@ chat.ask "What's the best way to learn Ruby?"
 ```ruby
 # Analyze any file type
 chat.ask "What's in this image?", with: "ruby_conf.jpg"
+chat.ask "What's happening in this video?", with: "video.mp4"
 chat.ask "Describe this meeting", with: "meeting.wav"
 chat.ask "Summarize this document", with: "contract.pdf"
 chat.ask "Explain this code", with: "app.rb"
@@ -68,6 +69,11 @@ RubyLLM.paint "a sunset over mountains in watercolor style"
 RubyLLM.embed "Ruby is elegant and expressive"
 ```
 
+```ruby
+# Moderate content for safety
+RubyLLM.moderate "Check if this text is safe"
+```
+
 ```ruby
 # Let AI use your code
 class Weather < RubyLLM::Tool
@@ -100,18 +106,19 @@ response = chat.with_schema(ProductSchema).ask "Analyze this product", with: "pr
 ## Features
 
 * **Chat:** Conversational AI with `RubyLLM.chat`
-* **Vision:** Analyze images and screenshots
+* **Vision:** Analyze images and videos
 * **Audio:** Transcribe and understand speech
 * **Documents:** Extract from PDFs, CSVs, JSON, any file type
 * **Image generation:** Create images with `RubyLLM.paint`
-* **Embeddings:** Vector search with `RubyLLM.embed`
+* **Embeddings:** Generate embeddings with `RubyLLM.embed`
+* **Moderation:** Content safety with `RubyLLM.moderate`
 * **Tools:** Let AI call your Ruby methods
 * **Structured output:** JSON schemas that just work
 * **Streaming:** Real-time responses with blocks
 * **Rails:** ActiveRecord integration with `acts_as_chat`
 * **Async:** Fiber-based concurrency
 * **Model registry:** 500+ models with capability detection and pricing
-* **Providers:** OpenAI, Anthropic, Gemini, VertexAI, Bedrock, DeepSeek, Mistral, Ollama, OpenRouter, Perplexity, GPUStack, and any OpenAI-compatible API
+* **Providers:** OpenAI, Anthropic, Gemini, VertexAI, Bedrock, DeepSeek, Mistral, Ollama, OpenRouter, Perplexity, GPUStack, [RedCandle](https://github.com/scientist-labs/red-candle), and any OpenAI-compatible API
 
 ## Installation
 
@@ -132,18 +139,24 @@ end
 ## Rails
 
 ```bash
+# Install Rails Integration
 rails generate ruby_llm:install
+
+# Add Chat UI (optional)
+rails generate ruby_llm:chat_ui
 ```
 
 ```ruby
 class Chat < ApplicationRecord
   acts_as_chat
 end
 
-chat = Chat.create! model_id: "claude-sonnet-4"
+chat = Chat.create! model: "claude-sonnet-4"
 chat.ask "What's in this file?", with: "report.pdf"
 ```
 
+Visit `http://localhost:3000/chats` for a ready-to-use chat interface!
+
 ## Documentation
 
 [rubyllm.com](https://rubyllm.com)

diff --git a/docs/_advanced/models.md b/docs/_advanced/models.md
@@ -42,7 +42,7 @@ The registry stores crucial information about each model, including:
 *   **`name`**: A human-friendly name.
 *   **`context_window`**: Max input tokens (e.g., `128_000`).
 *   **`max_tokens`**: Max output tokens (e.g., `16_384`).
-*   **`supports_vision`**: If it can process images.
+*   **`supports_vision`**: If it can process images and videos.
 *   **`supports_functions`**: If it can use [Tools]({% link _core_features/tools.md %}).
 *   **`input_price_per_million`**: Cost in USD per 1 million input tokens.
 *   **`output_price_per_million`**: Cost in USD per 1 million output tokens.
@@ -86,7 +86,7 @@ chat_models = RubyLLM.models.refresh!.chat_models
 
 **Local Provider Models:**
 
-By default, `refresh!` includes models from local providers like Ollama and GPUStack if they're configured. To exclude local providers and only fetch from remote APIs (available in v1.6.5+):
+By default, `refresh!` includes models from local providers like Ollama and GPUStack if they're configured. To exclude local providers and only fetch from remote APIs:
 
 ```ruby
 # Only fetch from remote providers (Anthropic, OpenAI, etc.)
@@ -95,6 +95,33 @@ RubyLLM.models.refresh!(remote_only: true)
 
 This is useful when you want to refresh only cloud-based models without querying local model servers.
 
+### Dynamic Model Registration (Red Candle)
+
+Some providers register their models dynamically at runtime rather than through the models.json file. Red Candle is one such provider - it registers its GGUF models when the gem is loaded.
+
+**How Red Candle Models Work:**
+
+1. **Not in models.json**: Red Candle models don't appear in the static models.json file since they're only available when the gem is installed.
+
+2. **Dynamic Registration**: When ruby_llm.rb loads and Red Candle is available, it adds models to the in-memory registry:
+   ```ruby
+   # This happens automatically in lib/ruby_llm.rb
+   RubyLLM::Providers::RedCandle.models.each do |model|
+     RubyLLM.models.instance_variable_get(:@models) << model
+   end
+   ```
+
+3. **Excluded from refresh!**: The `refresh!(remote_only: true)` flag excludes Red Candle and other local providers.
+
+4. **Currently Supported Models**:
+   - `google/gemma-3-4b-it-qat-q4_0-gguf`
+   - `TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF`
+   - `TheBloke/Mistral-7B-Instruct-v0.2-GGUF`
+   - `Qwen/Qwen2.5-1.5B-Instruct-GGUF`
+   - `microsoft/Phi-3-mini-4k-instruct`
+
+Red Candle models are only available when the gem is installed with the red_candle group enabled. See the [Configuration Guide]({% link _getting_started/configuration.md %}) for installation instructions.
+
 **For Gem Development:**
 
 The `rake models:update` task is designed for gem maintainers and updates the `models.json` file shipped with the gem:
@@ -108,68 +135,20 @@ This task is not intended for Rails applications as it writes to gem directories
 
 **Persisting Models to Your Database:**
 
-If you want to store model information in your application's database for persistence, querying, or caching, create your own migration and sync logic. Here's an example schema and production-ready sync job:
+For Rails applications, the install generator sets up everything automatically:
 
-```ruby
-# db/migrate/xxx_create_llm_models.rb
-create_table "llm_models", force: :cascade do |t|
-  t.string "model_id", null: false
-  t.string "name", null: false
-  t.string "provider", null: false
-  t.boolean "available", default: false
-  t.boolean "is_default", default: false
-  t.datetime "last_synced_at"
-  t.integer "context_window"
-  t.integer "max_output_tokens"
-  t.jsonb "metadata", default: {}
-  t.datetime "created_at", null: false
-  t.datetime "updated_at", null: false
-  t.string "slug"
-  t.string "model_type"
-  t.string "family"
-  t.datetime "model_created_at"
-  t.date "knowledge_cutoff"
-  t.jsonb "modalities", default: {}, null: false
-  t.jsonb "capabilities", default: [], null: false
-  t.jsonb "pricing", default: {}, null: false
-
-  t.index ["model_id"], unique: true
-  t.index ["provider", "available", "context_window"]
-  t.index ["capabilities"], using: :gin
-  t.index ["modalities"], using: :gin
-  t.index ["pricing"], using: :gin
-end
+```bash
+rails generate ruby_llm:install
+rails db:migrate
+```
 
-# app/jobs/sync_llm_models_job.rb
-class SyncLLMModelsJob < ApplicationJob
-  queue_as :default
-  retry_on StandardError, wait: 1.seconds, attempts: 5
-
-  def perform
-    RubyLLM.models.refresh!
-
-    found_model_ids = RubyLLM.models.chat_models.filter_map do |model_data|
-      attributes = model_data.to_h
-      attributes[:model_id] = attributes.delete(:id)
-      attributes[:model_type] = attributes.delete(:type)
-      attributes[:model_created_at] = attributes.delete(:created_at)
-      attributes[:last_synced_at] = Time.now
-
-      model = LLMModel.find_or_initialize_by(model_id: attributes[:model_id])
-      model.assign_attributes(**attributes)
-      model.save ? model.id : nil
-    end
-
-    # Mark missing models as unavailable instead of deleting them
-    LLMModel.where.not(id: found_model_ids).update_all(available: false)
-  end
-end
+This creates the Model table and loads model data from the gem's registry.
 
-# Schedule it to run periodically
-# config/schedule.rb (with whenever gem)
-every 6.hours do
-  runner "SyncLLMModelsJob.perform_later"
-end
+To refresh model data from provider APIs:
+
+```ruby
+# Fetches latest model info from configured providers (requires API keys)
+Model.refresh!
 ```
 
 ## Exploring and Finding Models
@@ -323,4 +302,4 @@ image = RubyLLM.paint(
 *   **Your Responsibility:** Ensure the model ID is correct for the target endpoint.
 *   **Warning Log:** A warning is logged indicating validation was skipped.
 
-Use these features when the standard registry doesn't cover your specific model or endpoint needs. For standard models, rely on the registry for validation and capability awareness. See the [Chat Guide]({% link _core_features/chat.md %}) for more on using the `chat` object.
+Use these features when the standard registry doesn't cover your specific model or endpoint needs. For standard models, rely on the registry for validation and capability awareness. See the [Chat Guide]({% link _core_features/chat.md %}) for more on using the `chat` object.