Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 22 additions & 7 deletions api/config_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from flask import Blueprint, jsonify, request

from utils.path_helpers import expand_tilde_path
from utils.path_validation import WorkspacePathError, validate_workspace_path
from utils.workspace_path import set_workspace_path_override

bp = Blueprint("config_api", __name__)
Expand Down Expand Up @@ -75,14 +76,28 @@ def validate_path():

@bp.route("/api/set-workspace", methods=["POST"])
def set_workspace():
# Reject non-dict JSON bodies (array / string / number / null). Without
# this, get_json returns the value directly, the truthy fallback `or {}`
# is bypassed, and `body.get("path", "")` raises AttributeError — which
# the outer Exception handler then mis-reports as a 500 server error
# instead of a 400 client error. (CodeRabbit on PR #16.)
body = request.get_json(silent=True)
if not isinstance(body, dict):
return jsonify({"error": "request body must be a JSON object"}), 400
raw = body.get("path", "")
# Validate the supplied path BEFORE storing the override (issue #15).
# validate_workspace_path collapses `..` traversal AND resolves symlinks
# via realpath, then enforces that the canonical target is an existing
# directory containing Cursor workspace markers. Returns the canonical
# path so we store that, not whatever the caller sent.
try:
body = request.get_json(silent=True) or {}
path = body.get("path", "")
expanded = expand_tilde_path(path)
set_workspace_path_override(expanded)
return jsonify({"success": True})
except Exception:
return jsonify({"error": "Failed to set workspace path"}), 500
canonical = validate_workspace_path(raw)
except WorkspacePathError as e:
return jsonify({"error": str(e)}), 400
except Exception: # noqa: BLE001 — only here as a fallback
return jsonify({"error": "Failed to validate workspace path"}), 500
set_workspace_path_override(canonical)
return jsonify({"success": True, "path": canonical})


@bp.route("/api/get-username")
Expand Down
201 changes: 201 additions & 0 deletions tests/test_workspace_path_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
"""
Regression tests for issue #15 — /api/set-workspace path validation.

Exercises validate_workspace_path() directly. Imports from utils/ to avoid
pulling Flask into scope (tests/test_cli_args.py convention).

Run:
python -m unittest tests.test_workspace_path_validation -v
"""

from __future__ import annotations

import os
import shutil
import sys
import tempfile
import unittest

REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, REPO_ROOT)

from utils.path_validation import WorkspacePathError, validate_workspace_path


def _make_cursor_workspace_dir(parent: str, name: str = "real-storage") -> str:
"""Create a directory that looks like a Cursor workspaceStorage dir.

Layout:
<parent>/<name>/
ws-001/state.vscdb ← marker file the validator looks for
"""
storage = os.path.join(parent, name)
ws = os.path.join(storage, "ws-001")
os.makedirs(ws)
with open(os.path.join(ws, "state.vscdb"), "wb") as f:
f.write(b"")
return storage


class TestValidateWorkspacePath(unittest.TestCase):

def setUp(self):
self.tmp = tempfile.mkdtemp(prefix="cursor-validate-test-")
self.addCleanup(shutil.rmtree, self.tmp, ignore_errors=True)

# ─── Happy path ────────────────────────────────────────────────

def test_accepts_directory_with_cursor_marker(self):
storage = _make_cursor_workspace_dir(self.tmp)
result = validate_workspace_path(storage)
self.assertEqual(result, os.path.realpath(storage))

def test_returns_canonical_path_collapsing_dotdot(self):
# /tmp/<x>/real-storage/../real-storage → /tmp/<x>/real-storage
storage = _make_cursor_workspace_dir(self.tmp)
traversal_input = os.path.join(storage, "..", os.path.basename(storage))
result = validate_workspace_path(traversal_input)
self.assertEqual(result, os.path.realpath(storage))
self.assertNotIn("..", result)

# ─── Hard rejects ──────────────────────────────────────────────

def test_rejects_empty_string(self):
with self.assertRaises(WorkspacePathError) as ctx:
validate_workspace_path("")
self.assertIn("required", str(ctx.exception))

def test_rejects_whitespace_only(self):
with self.assertRaises(WorkspacePathError):
validate_workspace_path(" \t ")

def test_rejects_non_string(self):
with self.assertRaises(WorkspacePathError):
validate_workspace_path(None) # type: ignore[arg-type]

def test_rejects_non_existent_path(self):
bogus = os.path.join(self.tmp, "does-not-exist", "anywhere")
with self.assertRaises(WorkspacePathError) as ctx:
validate_workspace_path(bogus)
self.assertIn("does not exist", str(ctx.exception))

def test_rejects_file_not_directory(self):
f = os.path.join(self.tmp, "regular-file")
with open(f, "w") as h:
h.write("not a directory")
with self.assertRaises(WorkspacePathError) as ctx:
validate_workspace_path(f)
self.assertIn("not a directory", str(ctx.exception))

def test_rejects_directory_without_cursor_markers(self):
# Existing directory but no state.vscdb anywhere — common case for
# a user pointing at /tmp, /etc, /, ~/.ssh, etc.
plain = os.path.join(self.tmp, "plain-dir")
os.makedirs(os.path.join(plain, "subdir"))
with self.assertRaises(WorkspacePathError) as ctx:
validate_workspace_path(plain)
self.assertIn("Cursor workspaceStorage", str(ctx.exception))

# ─── Path-traversal class ──────────────────────────────────────

def test_traversal_into_non_workspace_is_rejected(self):
# Keep traversal target inside this test's own temp tree — escaping
# to /tmp itself would be non-deterministic (any other test or
# process creating a `state.vscdb` under /tmp/<dir>/state.vscdb
# would flip this test's outcome).
#
# <self.tmp>/isolated-root/storage/../.. → <self.tmp>/isolated-root
# which contains no state.vscdb under any subdir → reject on markers.
isolated_root = os.path.join(self.tmp, "isolated-root")
os.makedirs(isolated_root)
storage = _make_cursor_workspace_dir(isolated_root)
escape = os.path.join(storage, "..", "..")
with self.assertRaises(WorkspacePathError):
validate_workspace_path(escape)
Comment thread
timon0305 marked this conversation as resolved.

# ─── Symlink-escape class ──────────────────────────────────────

@unittest.skipIf(sys.platform == "win32", "POSIX symlinks only")
def test_symlink_to_non_workspace_is_rejected(self):
# A symlink that points to / (no Cursor markers) is rejected because
# realpath() resolves to the real target before the marker check.
link = os.path.join(self.tmp, "evil-link")
os.symlink("/", link)
with self.assertRaises(WorkspacePathError) as ctx:
validate_workspace_path(link)
self.assertIn("Cursor workspaceStorage", str(ctx.exception))

@unittest.skipIf(sys.platform == "win32", "POSIX symlinks only")
def test_symlink_to_real_workspace_is_canonicalised_and_accepted(self):
# Symlink → real Cursor storage. Accepted, but the canonical path
# returned is the realpath (the storage dir), NOT the symlink path.
storage = _make_cursor_workspace_dir(self.tmp)
link = os.path.join(self.tmp, "good-link")
os.symlink(storage, link)
result = validate_workspace_path(link)
self.assertEqual(result, os.path.realpath(storage))
self.assertNotEqual(result, link)


class TestSetWorkspaceApi(unittest.TestCase):
"""API-layer regressions for POST /api/set-workspace.

The validator helper has its own coverage above; these cases exist to
pin behaviour the API handler owns (request body shape handling,
HTTP status mapping). Notably the non-dict-body case which used to
surface as a 500 instead of a 400 — see CodeRabbit on PR #16.
"""

def setUp(self):
from flask import Flask
from api.config_api import bp as config_bp

self.tmp = tempfile.mkdtemp(prefix="cursor-validate-api-test-")
self.addCleanup(shutil.rmtree, self.tmp, ignore_errors=True)

app = Flask(__name__)
app.config["TESTING"] = True
app.register_blueprint(config_bp)
self.client = app.test_client()

def test_non_dict_json_array_returns_400_not_500(self):
# Regression: a JSON array body (truthy, non-dict) used to trip
# AttributeError on body.get(...) and surface as a 500.
resp = self.client.post(
"/api/set-workspace",
data="[]",
content_type="application/json",
)
self.assertEqual(resp.status_code, 400)
self.assertIn("error", resp.get_json())

def test_non_dict_json_string_returns_400(self):
resp = self.client.post(
"/api/set-workspace",
data='"some string"',
content_type="application/json",
)
self.assertEqual(resp.status_code, 400)

def test_non_dict_json_number_returns_400(self):
resp = self.client.post(
"/api/set-workspace",
data="42",
content_type="application/json",
)
self.assertEqual(resp.status_code, 400)

def test_dict_with_valid_path_returns_200_with_canonical(self):
storage = _make_cursor_workspace_dir(self.tmp)
resp = self.client.post(
"/api/set-workspace",
json={"path": storage},
)
self.assertEqual(resp.status_code, 200)
body = resp.get_json()
self.assertTrue(body["success"])
self.assertEqual(body["path"], os.path.realpath(storage))


if __name__ == "__main__":
unittest.main()
83 changes: 83 additions & 0 deletions utils/path_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Validation for workspace paths submitted via /api/set-workspace.

Lives outside ``api/`` so the unit tests can import it without pulling
Flask into scope (the existing test suite intentionally avoids Flask —
see ``tests/test_cli_args.py`` for the convention).

The validation collapses path traversal *and* resolves symlinks via
``os.path.realpath()`` in a single step. Both ``/foo/../bar`` and a
symlink that points outside the intended tree become whatever the
canonical real path is on disk; downstream checks then operate on
that canonical value, not on whatever the caller sent.
"""

from __future__ import annotations

import os

from .path_helpers import expand_tilde_path


class WorkspacePathError(ValueError):
"""Raised when a /api/set-workspace path fails validation.

Carries a single ``reason`` string suitable for a 400 response body.
Distinct exception type so the API handler can map it to a 400 while
letting unexpected exceptions surface as 500.
"""


def _has_cursor_workspace_markers(directory: str) -> bool:
"""Return True iff at least one immediate subdirectory contains state.vscdb.

Same heuristic /api/validate-path already uses to recognise a Cursor
workspaceStorage directory. Used here as the final accept gate so that
a symlink whose realpath happens to leave the user's own data area
(e.g. /tmp, /etc) is rejected — those locations have no state.vscdb.
"""
try:
names = os.listdir(directory)
except OSError:
return False
for name in names:
full = os.path.join(directory, name)
try:
if os.path.isdir(full) and os.path.isfile(os.path.join(full, "state.vscdb")):
return True
except OSError:
continue
return False


def validate_workspace_path(raw_path: str) -> str:
"""Validate a /api/set-workspace input and return the canonical real path.

Raises :class:`WorkspacePathError` if the path:
- is empty / not a string,
- does not exist after symlink + ``..`` resolution,
- is not a directory,
- contains no Cursor workspace markers (no immediate subdir with state.vscdb).

On success, returns the canonical absolute real path. The caller should
store that, not the raw input, so subsequent reads resolve through the
same canonical value.
"""
if not isinstance(raw_path, str) or not raw_path.strip():
raise WorkspacePathError("path is required")

expanded = expand_tilde_path(raw_path)
# realpath() collapses `..` AND resolves symlinks. Both classes of escape
# become equivalent to whatever is actually on disk.
real = os.path.realpath(expanded)

if not os.path.exists(real):
raise WorkspacePathError("path does not exist")
if not os.path.isdir(real):
raise WorkspacePathError("path is not a directory")
if not _has_cursor_workspace_markers(real):
raise WorkspacePathError(
"path does not look like a Cursor workspaceStorage directory "
"(no immediate subdirectory contains state.vscdb)"
)

return real