airmonitor · airmonitor · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "socraticode",
-  "version": "1.8.2",
+  "version": "1.8.3",
   "description": "Codebase intelligence — semantic search workflows, dependency graph analysis, and context artifact exploration for SocratiCode",
   "author": {
     "name": "Giancarlo Erra",

diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "socraticode",
-  "version": "1.8.2",
+  "version": "1.8.3",
   "description": "Codebase intelligence: semantic search workflows, dependency graph analysis, and context artifact exploration for SocratiCode",
   "author": {
     "name": "Giancarlo Erra",

diff --git a/.cursor-plugin/plugin.json b/.cursor-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "socraticode",
-  "version": "1.8.2",
+  "version": "1.8.3",
   "description": "Codebase intelligence: semantic search workflows, dependency graph analysis, and context artifact exploration for SocratiCode",
   "author": {
     "name": "Giancarlo Erra",

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to SocratiCode are documented here.
 This project uses [Conventional Commits](https://www.conventionalcommits.org/) and [Semantic Versioning](https://semver.org/).
 
 
+## [1.8.3](https://github.com/giancarloerra/socraticode/compare/v1.8.2...v1.8.3) (2026-05-04)
+
+### Features
+
+* **embeddings:** add LM Studio as a first-class embedding provider ([#42](https://github.com/giancarloerra/socraticode/issues/42)) ([332ee80](https://github.com/giancarloerra/socraticode/commit/332ee800a85fd35ded4e37adabecbfdd6221d31b))
+
 ## [1.8.2](https://github.com/giancarloerra/socraticode/compare/v1.8.1...v1.8.2) (2026-05-04)
 
 ### Bug Fixes

diff --git a/README.md b/README.md
@@ -240,7 +240,7 @@ On VS Code's 2.45M‑line codebase, SocratiCode answers architectural questions
 - **Hybrid code search** — Built on Qdrant, a purpose-built vector database with HNSW indexing, concurrent read/write, and payload filtering. Each chunk stores both a dense vector and a BM25 sparse vector; the Query API runs both sub-queries in a single round-trip and fuses results with Reciprocal Rank Fusion (RRF). Semantic search handles conceptual queries like "authentication middleware" even when those exact words don't appear in the code. BM25 handles exact identifier and keyword lookups. You get the best of both in every query with no tuning required.
 - **Configurable Qdrant** — Use the built-in Docker Qdrant (default, zero config) or connect to your own instance (self-hosted, remote server, or Qdrant Cloud). Configure via `QDRANT_MODE`, `QDRANT_URL`, and `QDRANT_API_KEY` environment variables.
 - **Configurable Ollama** — Use the built-in Docker Ollama (default, zero config) or point to your own Ollama instance (native install -GPU access-, remote server, etc.). Configure via `OLLAMA_MODE`, `OLLAMA_URL`, `EMBEDDING_MODEL` and `EMBEDDING_DIMENSIONS` environment variables.
-- **Multi-provider embeddings** — Switch between Local Ollama (private, GPU access), Docker Ollama (zero-config), OpenAI (`text-embedding-3-small`, fastest), or Google Gemini (`gemini-embedding-001`, free tier) with a single environment variable. No provider-specific configuration files.
+- **Multi-provider embeddings** — Switch between Local Ollama (private, GPU access), Docker Ollama (zero-config), OpenAI (`text-embedding-3-small`, fastest), Google Gemini (`gemini-embedding-001`, free tier), or LM Studio (local OpenAI-compatible server) with a single environment variable. No provider-specific configuration files.
 - **Private & secure** — Everything runs on your machine — your code never leaves your network. The default Docker setup includes Ollama (embeddings) and Qdrant (vector storage) with no external API calls. No API costs, no token limits. Suitable for air-gapped and on-premises environments. Optional cloud providers (OpenAI, Google Gemini, Qdrant Cloud) are available but never required.
 - **AST-aware chunking** — Files are split at function/class boundaries using AST parsing (ast-grep), not arbitrary line counts. This produces higher-quality search results. Falls back to line-based chunking for unsupported languages.
 - **Polyglot code dependency graph** — Static analysis of import/require/use/include statements using ast-grep for 18+ languages. No external tools like dependency-cruiser required. Detects circular dependencies and generates visual Mermaid diagrams.
@@ -685,6 +685,36 @@ Use Google's Gemini embedding API. Requires an [API key](https://aistudio.google
 
 > Defaults: `EMBEDDING_MODEL=gemini-embedding-001`, `EMBEDDING_DIMENSIONS=3072`.
 
+#### LM Studio (local, OpenAI-compatible)
+
+[LM Studio](https://lmstudio.ai/) ships with a Local Server that exposes an OpenAI-compatible
+API on `http://localhost:1234/v1`. Use this provider when you want to host embedding models
+in LM Studio (e.g. when LM Studio is your single source for both chat and embedding models,
+or when you want a Mac/Windows-friendly desktop UI for managing GGUF models).
+
+```json
+{
+  "mcpServers": {
+    "socraticode": {
+      "command": "node",
+      "args": ["/absolute/path/to/socraticode/dist/index.js"],
+      "env": {
+        "EMBEDDING_PROVIDER": "lmstudio",
+        "EMBEDDING_MODEL": "nomic-embed-text-v1.5",
+        "EMBEDDING_DIMENSIONS": "768"
+      }
+    }
+  }
+}
+```
+
+> **No defaults — `EMBEDDING_MODEL` and `EMBEDDING_DIMENSIONS` are required.** LM Studio has
+> no out-of-the-box embedding model; you load one yourself in the Local Server tab. SocratiCode
+> fails fast if either is missing.
+>
+> Optional: `LMSTUDIO_URL` (default `http://localhost:1234/v1`) for non-default ports;
+> `LMSTUDIO_API_KEY` if you've enabled API key auth in LM Studio.
+
 ### Git Worktrees (shared index across directories)
 
 If you use [git worktrees](https://git-scm.com/docs/git-worktree) — or any workflow where the same repository lives in multiple directories — each path would normally get its own Qdrant index. This means redundant embedding and storage for what is essentially the same codebase.
@@ -1072,10 +1102,10 @@ The rest of this section documents the variables themselves. Pass them using whi
 
 | Variable | Default | Description |
 |----------|---------|-------------|
-| `EMBEDDING_PROVIDER` | `ollama` | Embedding backend: `ollama` (local, default), `openai`, or `google` |
-| `EMBEDDING_MODEL` | *(per provider)* | Model name. Defaults: `nomic-embed-text` (ollama), `text-embedding-3-small` (openai), `gemini-embedding-001` (google) |
-| `EMBEDDING_DIMENSIONS` | *(per provider)* | Vector dimensions. Defaults: `768` (ollama), `1536` (openai), `3072` (google) |
-| `EMBEDDING_CONTEXT_LENGTH` | *(auto-detected)* | Model context window in tokens. Auto-detected for known models. Set manually for custom models. |
+| `EMBEDDING_PROVIDER` | `ollama` | Embedding backend: `ollama` (local, default), `openai`, `google`, or `lmstudio` |
+| `EMBEDDING_MODEL` | *(per provider)* | Model name. Defaults: `nomic-embed-text` (ollama), `text-embedding-3-small` (openai), `gemini-embedding-001` (google). **Required** for `lmstudio` (no default). |
+| `EMBEDDING_DIMENSIONS` | *(per provider)* | Vector dimensions. Defaults: `768` (ollama), `1536` (openai), `3072` (google). **Required** for `lmstudio` (no default; varies per loaded model). |
+| `EMBEDDING_CONTEXT_LENGTH` | *(auto-detected)* | Model context window in tokens. Auto-detected for known models. Set manually for custom or LM Studio models. |
 
 ### Ollama Configuration (when `EMBEDDING_PROVIDER=ollama`)
 
@@ -1094,6 +1124,13 @@ The rest of this section documents the variables themselves. Pass them using whi
 | `OPENAI_API_KEY` | *(none)* | Required when `EMBEDDING_PROVIDER=openai`. Get from [platform.openai.com](https://platform.openai.com/api-keys) |
 | `GOOGLE_API_KEY` | *(none)* | Required when `EMBEDDING_PROVIDER=google`. Get from [aistudio.google.com](https://aistudio.google.com/apikey) |
 
+### LM Studio Configuration (when `EMBEDDING_PROVIDER=lmstudio`)
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `LMSTUDIO_URL` | `http://localhost:1234/v1` | Full base URL of LM Studio's OpenAI-compatible Local Server. Override when the server runs on a non-default port or a remote machine (e.g. `http://gpu-rig.local:5678/v1`). Must include the `/v1` suffix. |
+| `LMSTUDIO_API_KEY` | *(none)* | Optional. LM Studio's Local Server has no auth by default; set this only if you've enabled API key auth in the LM Studio UI. |
+
 ### Qdrant Configuration
 
 | Variable | Default | Description |

diff --git a/extension/package.json b/extension/package.json
@@ -2,7 +2,7 @@
   "name": "socraticode",
   "displayName": "SocratiCode",
   "description": "Codebase context engine for AI assistants. Hybrid search, dependency and call graphs, symbol-level impact analysis (blast radius), interactive graph explorer, and searchable architecture artefacts. Works with Copilot agent mode, Cline, Continue, Roo Code, and any MCP-compatible host.",
-  "version": "1.8.2",
+  "version": "1.8.3",
   "publisher": "giancarloerra",
   "license": "AGPL-3.0-only",
   "icon": "images/icon.png",

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "socraticode",
   "mcpName": "io.github.giancarloerra/socraticode",
-  "version": "1.8.2",
+  "version": "1.8.3",
   "description": "SocratiCode — MCP server for local codebase indexing, semantic search, and code dependency graphs. All private, all local via Docker.",
   "type": "module",
   "main": "dist/index.js",

diff --git a/src/services/embedding-config.ts b/src/services/embedding-config.ts
@@ -7,6 +7,8 @@
  *   - "ollama" (default): Use Ollama for embeddings (Docker or external).
  *   - "openai": Use OpenAI Embeddings API. Requires OPENAI_API_KEY.
  *   - "google": Use Google Generative AI Embedding API. Requires GOOGLE_API_KEY.
+ *   - "lmstudio": Use a local LM Studio server (OpenAI-compatible). Requires
+ *                 EMBEDDING_MODEL and EMBEDDING_DIMENSIONS to be set explicitly.
  *
  * Ollama-specific:
  *   OLLAMA_MODE:
@@ -26,17 +28,24 @@
  *   OPENAI_API_KEY:        Required for openai provider.
  *   GOOGLE_API_KEY:        Required for google provider.
  *
+ * LM Studio-specific:
+ *   LMSTUDIO_URL:          OpenAI-compatible base URL for LM Studio's local server.
+ *                          Default: http://localhost:1234/v1
+ *   LMSTUDIO_API_KEY:      Optional API key. LM Studio's Local Server has no auth by default;
+ *                          set this only if you've enabled an API key in LM Studio.
+ *
  * Shared:
- *   EMBEDDING_MODEL:       Model name (default depends on provider).
- *   EMBEDDING_DIMENSIONS:  Vector dimensions — must match the model (default depends on provider).
+ *   EMBEDDING_MODEL:       Model name (default depends on provider; required for lmstudio).
+ *   EMBEDDING_DIMENSIONS:  Vector dimensions — must match the model (default depends on
+ *                          provider; required for lmstudio).
  *   EMBEDDING_CONTEXT_LENGTH: Override context window in tokens (auto-detected for known models).
  */
 
 import { logger } from "./logger.js";
 
 // ── Types ─────────────────────────────────────────────────────────────────
 
-export type EmbeddingProvider = "ollama" | "openai" | "google";
+export type EmbeddingProvider = "ollama" | "openai" | "google" | "lmstudio";
 export type OllamaMode = "docker" | "external" | "auto";
 
 export interface EmbeddingConfig {
@@ -46,6 +55,8 @@ export interface EmbeddingConfig {
   ollamaMode: OllamaMode;
   /** Ollama API URL (only relevant when embeddingProvider is "ollama"). */
   ollamaUrl: string;
+  /** LM Studio OpenAI-compatible base URL (only relevant when embeddingProvider is "lmstudio"). */
+  lmstudioUrl: string;
   embeddingModel: string;
   embeddingDimensions: number;
   /** Max context window in tokens. Used for client-side pre-truncation. */
@@ -55,10 +66,16 @@ export interface EmbeddingConfig {
 
 // ── Provider defaults ─────────────────────────────────────────────────────
 
+/**
+ * lmstudio has empty defaults: LM Studio has no out-of-the-box model — users must load
+ * one in the UI and choose dimensions to match. We fail-fast in loadEmbeddingConfig()
+ * when the user picks lmstudio without setting EMBEDDING_MODEL / EMBEDDING_DIMENSIONS.
+ */
 const PROVIDER_DEFAULTS: Record<EmbeddingProvider, { model: string; dimensions: number }> = {
-  ollama:  { model: "nomic-embed-text",        dimensions: 768  },
-  openai:  { model: "text-embedding-3-small",  dimensions: 1536 },
-  google:  { model: "gemini-embedding-001",    dimensions: 3072 },
+  ollama:   { model: "nomic-embed-text",        dimensions: 768  },
+  openai:   { model: "text-embedding-3-small",  dimensions: 1536 },
+  google:   { model: "gemini-embedding-001",    dimensions: 3072 },
+  lmstudio: { model: "",                        dimensions: 0    },
 };
 
 // ── Ollama mode defaults ──────────────────────────────────────────────────
@@ -109,14 +126,39 @@ export function loadEmbeddingConfig(): EmbeddingConfig {
 
   // ── Provider ────────────────────────────────────────────────────────
   const rawProvider = process.env.EMBEDDING_PROVIDER || "ollama";
-  if (rawProvider !== "ollama" && rawProvider !== "openai" && rawProvider !== "google") {
+  if (
+    rawProvider !== "ollama" &&
+    rawProvider !== "openai" &&
+    rawProvider !== "google" &&
+    rawProvider !== "lmstudio"
+  ) {
     throw new Error(
-      `Invalid EMBEDDING_PROVIDER: "${rawProvider}". Must be "ollama", "openai", or "google".`,
+      `Invalid EMBEDDING_PROVIDER: "${rawProvider}". Must be "ollama", "openai", "google", or "lmstudio".`,
     );
   }
   const embeddingProvider: EmbeddingProvider = rawProvider;
   const providerDefaults = PROVIDER_DEFAULTS[embeddingProvider];
 
+  // LM Studio has no sensible defaults — model and dimensions vary per loaded model.
+  // Fail fast with an actionable message rather than silently sending empty values.
+  if (embeddingProvider === "lmstudio") {
+    if (!process.env.EMBEDDING_MODEL) {
+      throw new Error(
+        "EMBEDDING_MODEL is required when EMBEDDING_PROVIDER=lmstudio. " +
+        "LM Studio has no built-in default — set it to the model identifier shown in " +
+        "LM Studio's Local Server tab (e.g. EMBEDDING_MODEL=nomic-embed-text-v1.5).",
+      );
+    }
+    if (!process.env.EMBEDDING_DIMENSIONS) {
+      throw new Error(
+        "EMBEDDING_DIMENSIONS is required when EMBEDDING_PROVIDER=lmstudio. " +
+        "Different LM Studio models have different output dimensions — check the model card " +
+        "and set EMBEDDING_DIMENSIONS accordingly (e.g. 768 for nomic-embed-text-v1.5, " +
+        "1024 for bge-large-en-v1.5, 4096 for qwen3-embedding-8b).",
+      );
+    }
+  }
+
   // ── Ollama mode (only relevant for ollama provider) ─────────────────
   const rawMode = process.env.OLLAMA_MODE || "auto";
   if (rawMode !== "docker" && rawMode !== "external" && rawMode !== "auto") {
@@ -145,6 +187,7 @@ export function loadEmbeddingConfig(): EmbeddingConfig {
     embeddingProvider,
     ollamaMode,
     ollamaUrl: process.env.OLLAMA_URL || modeDefaults.url,
+    lmstudioUrl: process.env.LMSTUDIO_URL || "http://localhost:1234/v1",
     embeddingModel,
     embeddingDimensions,
     embeddingContextLength: contextLengthEnv
@@ -167,14 +210,21 @@ export function loadEmbeddingConfig(): EmbeddingConfig {
       ollamaMode: _config.ollamaMode,
       ollamaUrl: _config.ollamaUrl,
     } : {}),
+    ...(embeddingProvider === "lmstudio" ? {
+      lmstudioUrl: _config.lmstudioUrl,
+    } : {}),
     embeddingModel: _config.embeddingModel,
     embeddingDimensions: _config.embeddingDimensions,
     embeddingContextLength: _config.embeddingContextLength || "auto",
     hasApiKey: !!(embeddingProvider === "ollama"
       ? _config.ollamaApiKey
       : embeddingProvider === "openai"
         ? process.env.OPENAI_API_KEY
-        : process.env.GOOGLE_API_KEY),
+        : embeddingProvider === "google"
+          ? process.env.GOOGLE_API_KEY
+          : embeddingProvider === "lmstudio"
+            ? process.env.LMSTUDIO_API_KEY
+            : undefined),
   });
 
   return _config;

diff --git a/src/services/embedding-provider.ts b/src/services/embedding-provider.ts
@@ -8,9 +8,10 @@
  * about which backend generates the vectors.
  *
  * Providers:
- *   - ollama  (default) — local Ollama (Docker or external)
- *   - openai  — OpenAI Embeddings API (text-embedding-3-small, etc.)
- *   - google  — Google Generative AI Embedding API (gemini-embedding-001, etc.)
+ *   - ollama   (default) — local Ollama (Docker or external)
+ *   - openai   — OpenAI Embeddings API (text-embedding-3-small, etc.)
+ *   - google   — Google Generative AI Embedding API (gemini-embedding-001, etc.)
+ *   - lmstudio — local LM Studio server via OpenAI-compatible API
  */
 
 import type { InfraProgressCallback } from "./docker.js";
@@ -60,9 +61,14 @@ export async function getEmbeddingProvider(onProgress?: InfraProgressCallback):
       _provider = new GoogleEmbeddingProvider();
       break;
     }
+    case "lmstudio": {
+      const { LMStudioEmbeddingProvider } = await import("./provider-lmstudio.js");
+      _provider = new LMStudioEmbeddingProvider();
+      break;
+    }
     default:
       throw new Error(
-        `Unknown embedding provider: "${name}". Must be "ollama", "openai", or "google".`,
+        `Unknown embedding provider: "${name}". Must be "ollama", "openai", "google", or "lmstudio".`,
       );
   }