cecli-dev · dwash96 · Nov 15, 2025 · Nov 15, 2025 · Nov 15, 2025 · Nov 15, 2025
diff --git a/aider/__init__.py b/aider/__init__.py
@@ -1,6 +1,6 @@
 from packaging import version
 
-__version__ = "0.88.17.dev"
+__version__ = "0.88.18.dev"
 safe_version = __version__
 
 try:

diff --git a/aider/helpers/__init__.py b/aider/helpers/__init__.py
@@ -0,0 +1,9 @@
+"""Utility functions for aider."""
+
+from .similarity import cosine_similarity, create_bigram_vector, normalize_vector
+
+__all__ = [
+    "cosine_similarity",
+    "create_bigram_vector",
+    "normalize_vector",
+]
diff --git a/aider/helpers/similarity.py b/aider/helpers/similarity.py
@@ -0,0 +1,98 @@
+import numpy as np
+
+
+def normalize_vector(vector):
+    """Normalize a vector to unit length (L2 norm).
+
+    Args:
+        vector (np.ndarray or list): Input vector
+
+    Returns:
+        np.ndarray: Normalized vector with length 1
+    """
+    vector = np.asarray(vector, dtype=np.float64)
+    magnitude = np.linalg.norm(vector)
+    if magnitude == 0:
+        return vector  # Return original if zero vector
+    return vector / magnitude
+
+
+def cosine_similarity(vector1, vector2):
+    """Calculate cosine similarity between two vectors.
+
+    Args:
+        vector1 (np.ndarray or list): First vector
+        vector2 (np.ndarray or list): Second vector
+
+    Returns:
+        float: Cosine similarity between the vectors (range: -1 to 1)
+    """
+    vector1 = np.asarray(vector1, dtype=np.float64)
+    vector2 = np.asarray(vector2, dtype=np.float64)
+
+    if len(vector1) != len(vector2):
+        raise ValueError("Vectors must have the same length")
+
+    # Use NumPy's optimized dot product and norm functions
+    dot_product = np.dot(vector1, vector2)
+    magnitude1 = np.linalg.norm(vector1)
+    magnitude2 = np.linalg.norm(vector2)
+
+    if magnitude1 == 0 or magnitude2 == 0:
+        return 0.0  # Return 0 if either vector is zero
+
+    return dot_product / (magnitude1 * magnitude2)
+
+
+def create_bigram_vector(texts):
+    """Create a bigram frequency vector using optimized NumPy operations.
+
+    This version uses pre-computed bigram indices and NumPy's bincount
+    for maximum performance on large datasets.
+
+    Args:
+        texts (tuple): Tuple of strings to process
+
+    Returns:
+        np.ndarray: Vector of bigram frequencies
+    """
+    # Pre-compute bigram indices (0 for 'aa', 1 for 'ab', ..., 675 for 'zz')
+    bigram_indices = {}
+    idx = 0
+    for i in range(ord("a"), ord("z") + 1):
+        for j in range(ord("a"), ord("z") + 1):
+            bigram = chr(i) + chr(j)
+            bigram_indices[bigram] = idx
+            idx += 1
+
+    # Initialize frequency vector
+    vector = np.zeros(26 * 26, dtype=np.int32)
+
+    # Process all texts
+    for text in texts:
+        text_lower = text.lower()
+        if len(text_lower) < 2:
+            continue
+
+        # Extract bigrams using NumPy sliding window view
+        # Convert string to character array for efficient slicing
+        chars = np.array(list(text_lower))
+
+        # Create bigrams by combining consecutive characters
+        bigrams = np.core.defchararray.add(chars[:-1], chars[1:])
+
+        # Filter only alphabetic bigrams
+        mask = np.array([bg.isalpha() for bg in bigrams])
+        valid_bigrams = bigrams[mask]
+
+        # Count bigrams using bincount with pre-computed indices
+        indices = []
+        for bg in valid_bigrams:
+            if bg in bigram_indices:
+                indices.append(bigram_indices[bg])
+
+        if indices:
+            counts = np.bincount(indices, minlength=26 * 26)
+            vector += counts
+
+    return vector
diff --git a/aider/main.py b/aider/main.py
@@ -806,7 +806,7 @@ def get_io(pretty):
             return await main_async(argv, input, output, right_repo_root, return_coder=return_coder)
 
     if args.just_check_update:
-        update_available = check_version(io, just_check=True, verbose=args.verbose)
+        update_available = await check_version(io, just_check=True, verbose=args.verbose)
         analytics.event("exit", reason="Just checking update")
         return 0 if not update_available else 1
 
@@ -821,7 +821,7 @@ def get_io(pretty):
         return 0 if success else 1
 
     if args.check_update:
-        check_version(io, verbose=args.verbose)
+        await check_version(io, verbose=args.verbose)
 
     if args.verbose:
         show = format_settings(parser, args)

diff --git a/aider/repomap.py b/aider/repomap.py
@@ -17,6 +17,11 @@
 from tqdm import tqdm
 
 from aider.dump import dump
+from aider.helpers.similarity import (
+    cosine_similarity,
+    create_bigram_vector,
+    normalize_vector,
+)
 from aider.special import filter_important_files
 from aider.tools.tool_utils import ToolError
 
@@ -183,6 +188,12 @@ def __init__(
         self.map_processing_time = 0
         self.last_map = None
 
+        # Initialize cache for mentioned identifiers similarity
+        self._last_mentioned_idents = None
+        self._last_mentioned_idents_vector = None
+        self._has_last_mentioned_idents = False
+        self._mentioned_ident_similarity = 0.8
+
         if self.verbose:
             self.io.tool_output(
                 f"RepoMap initialized with map_mul_no_files: {self.map_mul_no_files}"
@@ -807,6 +818,15 @@ def get_ranked_tags_map(
         mentioned_idents=None,
         force_refresh=False,
     ):
+        if not other_fnames:
+            other_fnames = list()
+        if not max_map_tokens:
+            max_map_tokens = self.max_map_tokens
+        if not mentioned_fnames:
+            mentioned_fnames = set()
+        if not mentioned_idents:
+            mentioned_idents = set()
+
         # Create a cache key
         cache_key = [
             tuple(sorted(chat_fnames)) if chat_fnames else None,
@@ -815,11 +835,15 @@ def get_ranked_tags_map(
         ]
 
         if self.refresh == "auto":
+            # Handle mentioned_fnames normally
             cache_key += [
                 tuple(sorted(mentioned_fnames)) if mentioned_fnames else None,
-                tuple(sorted(mentioned_idents)) if mentioned_idents else None,
             ]
 
+            # Handle mentioned_idents with similarity check
+            cache_key_component = self._get_mentioned_idents_cache_component(mentioned_idents)
+            cache_key.append(cache_key_component)
+
         cache_key = hash(str(tuple(cache_key)))
 
         use_cache = False
@@ -1005,6 +1029,70 @@ def to_tree(self, tags, chat_rel_fnames):
 
         return output
 
+    def _get_mentioned_idents_cache_component(self, mentioned_idents):
+        """
+        Determine the cache key component for mentioned_idents using similarity comparison.
+
+        This method compares the current mentioned_idents with the previous ones using
+        cosine similarity. If the similarity is high enough, it returns the previous
+        cache key component to maintain cache hits. Otherwise, it updates the stored
+        values and returns the current mentioned_idents.
+
+        Args:
+            mentioned_idents (set): Current set of mentioned identifiers
+
+        Returns:
+            tuple or None: Cache key component for mentioned_idents
+        """
+        if not mentioned_idents:
+            self._last_mentioned_idents = None
+            self._last_mentioned_idents_vector = None
+            self._has_last_mentioned_idents = False
+            return None
+
+        current_mentioned_idents = tuple(mentioned_idents)
+
+        # Check if we have a previous cached value to compare against
+        if self._has_last_mentioned_idents:
+            # Create vector for current mentioned_idents
+            current_vector = create_bigram_vector(current_mentioned_idents)
+            current_vector_norm = normalize_vector(current_vector)
+
+            # Calculate cosine similarity
+            similarity = cosine_similarity(self._last_mentioned_idents_vector, current_vector_norm)
+            # If similarity is high enough, use the previous cache key component
+            if similarity >= self._mentioned_ident_similarity:
+                # Use the previous mentioned_idents for cache key to maintain cache hit
+                cache_key_component = self._last_mentioned_idents
+
+                # Make similarity more strict the more consecutive cache hits
+                self._mentioned_ident_similarity = min(
+                    0.9, self._mentioned_ident_similarity + 0.025
+                )
+            else:
+                # Similarity is too low, use current mentioned_idents
+                cache_key_component = current_mentioned_idents
+
+                # Update stored values
+                self._last_mentioned_idents = current_mentioned_idents
+                self._last_mentioned_idents_vector = current_vector_norm
+
+                # Make similarity less strict the more consecutive cache misses
+                self._mentioned_ident_similarity = max(
+                    0.5, self._mentioned_ident_similarity - 0.025
+                )
+        else:
+            # First time or no previous value, use current mentioned_idents
+            cache_key_component = current_mentioned_idents
+            current_vector = create_bigram_vector(current_mentioned_idents)
+
+            # Store for future comparisons
+            self._last_mentioned_idents = current_mentioned_idents
+            self._last_mentioned_idents_vector = normalize_vector(current_vector)
+
+        self._has_last_mentioned_idents = True
+        return cache_key_component
+
 
 def truncate_long_lines(text, max_length):
     return "\n".join([line[:max_length] for line in text.splitlines()]) + "\n"

diff --git a/aider/versioncheck.py b/aider/versioncheck.py
@@ -61,7 +61,7 @@ async def install_upgrade(io, latest_version=None):
     return
 
 
-def check_version(io, just_check=False, verbose=False):
+async def check_version(io, just_check=False, verbose=False):
     if not just_check and VERSION_CHECK_FNAME.exists():
         day = 60 * 60 * 24
         since = time.time() - os.path.getmtime(VERSION_CHECK_FNAME)
@@ -109,5 +109,5 @@ def check_version(io, just_check=False, verbose=False):
     if not is_update_available:
         return False
 
-    install_upgrade(io, latest_version)
+    await install_upgrade(io, latest_version)
     return True