From f3e449a793c6157ddc143f3668ced27eec45a49c Mon Sep 17 00:00:00 2001
From: Kiran Murugulla <murugull@adobe.com>
Date: Mon, 4 May 2026 15:12:31 -0400
Subject: [PATCH 1/2] medialibrary(index) : reducing the index file size and
 saving them in batches with introducing wait times in between to avoid
 da-admin worker issues

---
 nx/blocks/media-library/core/constants.js     |   2 -
 nx/blocks/media-library/indexing/admin-api.js |  67 ----------
 nx/blocks/media-library/indexing/medialog.js  |  39 +++---
 .../media-library/indexing/worker/fetch.js    | 116 +++++++++++-------
 .../media-library/indexing/worker/full.js     |  40 ++----
 .../indexing/worker/incremental.js            |  52 +++-----
 .../indexing/worker/linked-content.js         |   2 +-
 7 files changed, 124 insertions(+), 194 deletions(-)
diff --git a/nx/blocks/media-library/core/constants.js b/nx/blocks/media-library/core/constants.js
index ff65d3eb..4b46f759 100644
--- a/nx/blocks/media-library/core/constants.js
+++ b/nx/blocks/media-library/core/constants.js
@@ -14,8 +14,6 @@ export const IndexConfig = Object.freeze({
   DISCOVERY_MAX_PATHS_PER_JOB: 250,
   /* Larger batch to minimize UI update overhead - updates every ~100 seconds */
   USAGE_MAP_PROGRESSIVE_BATCH_SIZE: 1000,
-  /* Index chunking configuration */
-  MEDIA_INDEX_CHUNK_SIZE: 20_000, /* Entries per chunk (~15-20MB per chunk) */
   LOCK_HEARTBEAT_INTERVAL_MS: 60_000,
   LOCK_STALE_THRESHOLD_MS: 10 * 60_000,
   BUILD_MAX_DURATION_MS: 30 * 60 * 1000,
diff --git a/nx/blocks/media-library/indexing/admin-api.js b/nx/blocks/media-library/indexing/admin-api.js
index e58f7ee7..27b2e76b 100644
--- a/nx/blocks/media-library/indexing/admin-api.js
+++ b/nx/blocks/media-library/indexing/admin-api.js
@@ -98,20 +98,6 @@ function getChunkFileName(chunkNum) {
   return `${IndexFiles.MEDIA_INDEX_CHUNK_PREFIX}${String(chunkNum).padStart(3, '0')}.json`;
 }
 
-/**
- * Split media sheet into chunks
- * @param {Array} mediaData - Full media sheet data
- * @param {number} chunkSize - Entries per chunk
- * @returns {Array<Array>} Array of chunks
- */
-function chunkMediaSheet(mediaData, chunkSize) {
-  const chunks = [];
-  for (let i = 0; i < mediaData.length; i += chunkSize) {
-    chunks.push(mediaData.slice(i, i + chunkSize));
-  }
-  return chunks;
-}
-
 const DEFAULT_TIMEFRAME_DAYS = 3650; /* 10 years */
 
 export async function fetchWithAuth(url, opts = {}) {
@@ -321,59 +307,6 @@ export async function loadIndexChunks(basePath, chunkCount, sheetName, onProgres
   return results.map((r) => r.data).flat();
 }
 
-/**
- * Save index as chunks
- * @param {string} basePath - Base path without filename
- * @param {Array} mediaData - Media sheet data (must be pre-sorted)
- * @param {Array} usageData - Usage sheet data
- * @param {number} chunkSize - Entries per chunk
- * @returns {Promise<number>} Number of chunks created
- */
-export async function saveIndexChunks(basePath, mediaData, usageData, chunkSize) {
-  const mediaChunks = chunkMediaSheet(mediaData, chunkSize);
-
-  // Always save at least chunk 0, even if empty (for consistency)
-  const chunksToSave = mediaChunks.length > 0 ? mediaChunks : [[]];
-  const savePromises = [];
-
-  for (let i = 0; i < chunksToSave.length; i += 1) {
-    const chunkFileName = getChunkFileName(i);
-    const chunkPath = `${basePath}/${chunkFileName}`;
-
-    // Only include usage sheet in first chunk to avoid duplication
-    const sheets = {
-      media: chunksToSave[i],
-      usage: i === 0 ? usageData : [],
-    };
-
-    const formData = await createMultiSheet(sheets);
-    const savePromise = daFetch(`${DA_ORIGIN}/source${chunkPath}`, {
-      method: 'PUT',
-      body: formData,
-    });
-
-    savePromises.push(savePromise);
-  }
-
-  const responses = await Promise.all(savePromises);
-
-  // Validate all chunks saved successfully
-  const failedChunks = [];
-  responses.forEach((resp, i) => {
-    if (!resp.ok) {
-      failedChunks.push({ chunk: i, status: resp.status });
-    }
-  });
-
-  if (failedChunks.length > 0) {
-    const error = new Error(`Failed to save ${failedChunks.length}/${chunksToSave.length} chunks: ${failedChunks.map((f) => `chunk ${f.chunk} (${f.status})`).join(', ')}`);
-    error.failedChunks = failedChunks;
-    throw error;
-  }
-
-  return chunksToSave.length;
-}
-
 export async function saveSheet(data, path) {
   const formData = await createSheet(data);
   return daFetch(`${DA_ORIGIN}/source${path}`, {
diff --git a/nx/blocks/media-library/indexing/medialog.js b/nx/blocks/media-library/indexing/medialog.js
index 1e150eaf..5884f3c1 100644
--- a/nx/blocks/media-library/indexing/medialog.js
+++ b/nx/blocks/media-library/indexing/medialog.js
@@ -315,12 +315,27 @@ export function processPageMediaUpdates(
     const pageData = pageMediaMap.get(normalizedPath);
     const newEntries = pageData ? pageData.entries : [];
 
+    // Filter oldHashes to only include medialog-sourced entries for accurate comparison
+    // External media (extlinks-parsed, markdown-parsed) should not be compared against medialog
+    // because they're never in medialog - they come from markdown parsing
+    const oldMedialogHashes = new Set();
+    oldHashes.forEach((hash) => {
+      const entry = updatedIndex.find((e) => e.hash === hash && e.doc === normalizedPath);
+      if (entry) {
+        const op = entry.operation || entry.source;
+        const isFromMedialog = op !== 'extlinks-parsed' && op !== 'markdown-parsed' && op !== 'auditlog-parsed';
+        if (isFromMedialog) {
+          oldMedialogHashes.add(hash);
+        }
+      }
+    });
+
     onLog(`--- Page: ${normalizedPath} ---`);
-    onLog(`  Old (bypage): ${oldHashes.size}, New (page-based): ${newEntries.length}`);
+    onLog(`  Old (bypage): ${oldMedialogHashes.size}, New (page-based): ${newEntries.length}`);
 
     const newHashes = new Set(newEntries.map((e) => e.hash));
-    const toRemove = [...oldHashes].filter((h) => !newHashes.has(h));
-    const toAdd = [...newHashes].filter((h) => !oldHashes.has(h));
+    const toRemove = [...oldMedialogHashes].filter((h) => !newHashes.has(h));
+    const toAdd = [...newHashes].filter((h) => !oldMedialogHashes.has(h));
 
     if (toRemove.length || toAdd.length) {
       onLog(`  Diff: remove ${toRemove.length}, add ${toAdd.length}`);
@@ -329,18 +344,12 @@ export function processPageMediaUpdates(
     toRemove.forEach((hash) => {
       const oldEntry = updatedIndex.find((e) => e.hash === hash && e.doc === normalizedPath);
       if (oldEntry) {
-        // Don't remove external media (extlinks-parsed/markdown-parsed) or auditlog-parsed entries
-        // They come from markdown parsing, not medialog, so they're handled by processLinkedContent
-        const op = oldEntry.operation || oldEntry.source;
-        const isFromMarkdown = op === 'extlinks-parsed' || op === 'markdown-parsed' || op === 'auditlog-parsed';
-        if (!isFromMarkdown) {
-          removed += removeOrOrphanMedia(
-            updatedIndex,
-            oldEntry,
-            normalizedPath,
-            medialogEntries,
-          );
-        }
+        removed += removeOrOrphanMedia(
+          updatedIndex,
+          oldEntry,
+          normalizedPath,
+          medialogEntries,
+        );
       }
     });
 
diff --git a/nx/blocks/media-library/indexing/worker/fetch.js b/nx/blocks/media-library/indexing/worker/fetch.js
index c33d5496..202b2917 100644
--- a/nx/blocks/media-library/indexing/worker/fetch.js
+++ b/nx/blocks/media-library/indexing/worker/fetch.js
@@ -729,74 +729,108 @@ function chunkMediaSheet(mediaData, chunkSize) {
 }
 
 /**
- * Worker-safe version of saveIndexChunks from admin-api.js
+ * Determine optimal chunk size based on total entry count
  *
- * @param {string} basePath - Base path for chunks (e.g., /org/repo/.da/media-insights)
- * @param {Array} mediaData - Media sheet data
- * @param {Array} usageData - Usage sheet data
- * @param {number} chunkSize - Entries per chunk
- * @param {string} daOrigin - DA origin (e.g., https://admin.da.live)
+ * Rationale:
+ * - Small sites (<10k entries): Single file (100k chunk size ensures no chunking)
+ *   - No overhead from loading multiple chunks
+ *   - Simpler debugging and inspection
+ *
+ * - Medium sites (10k-200k): 8k entries per chunk (~4-5MB files)
+ *   - Prevents CF Worker 128MB memory limit errors during PUT
+ *   - Balances file size vs chunk count overhead
+ *   - Progressive loading: chunk 0 loads quickly for default Images view
+ *
+ * - Large sites (>200k entries): 6k entries per chunk (~3-4MB files)
+ *   - Smaller files for better reliability on massive indexes
+ *   - More chunks acceptable given already high chunk count
+ *   - Further reduces memory pressure on uploads
+ *
+ * Chunk size targets file sizes ≤5MB to avoid DA Admin/S3 timeouts
+ * Average media entry size: ~550 bytes (URL + metadata + doc field)
+ *
+ * @param {number} totalEntries - Total number of media entries in index
+ * @returns {number} Optimal chunk size (entries per chunk)
+ */
+export function getAdaptiveChunkSize(totalEntries) {
+  if (totalEntries < 10_000) {
+    return 100_000;
+  }
+
+  if (totalEntries < 200_000) {
+    return 8_000;
+  }
+
+  return 6_000;
+}
+
+/**
+ * Save index as chunks with batched uploads to prevent rate limiting
+ * Uploads 3 chunks concurrently with 500ms delays between batches
+ *
+ * @param {string} basePath - Base path without filename (e.g., '/site/.da/media-insights')
+ * @param {Array} mediaData - Media sheet data (must be pre-sorted)
+ * @param {number} chunkSize - Entries per chunk (from getAdaptiveChunkSize)
+ * @param {string} daOrigin - DA origin (e.g., 'https://admin.da.live')
  * @param {string} imsToken - IMS access token
  * @param {string} indexFilesChunkPrefix - Chunk filename prefix (e.g., 'index-')
- * @returns {Promise<number>} Number of chunks saved
+ * @returns {Promise<number>} Number of chunks created
  */
 export async function saveIndexChunks(
   basePath,
   mediaData,
-  usageData,
   chunkSize,
   daOrigin,
   imsToken,
   indexFilesChunkPrefix,
 ) {
   const mediaChunks = chunkMediaSheet(mediaData, chunkSize);
-
-  // Always save at least chunk 0, even if empty (for consistency)
   const chunksToSave = mediaChunks.length > 0 ? mediaChunks : [[]];
-  const savePromises = [];
-
-  for (let i = 0; i < chunksToSave.length; i += 1) {
-    const chunkFileName = getChunkFileName(i, indexFilesChunkPrefix);
-    const chunkPath = `${basePath}/${chunkFileName}`;
 
-    // Only include usage sheet in first chunk to avoid duplication
-    const sheets = {
-      media: chunksToSave[i],
-      usage: i === 0 ? usageData : [],
-    };
-
-    const formData = await createMultiSheet(sheets);
-    const savePromise = workerDaFetch(`${daOrigin}/source${chunkPath}`, imsToken, {
-      method: 'PUT',
-      body: formData,
+  // Rate limiting to prevent DA Admin endpoint overload:
+  // - batchSize=3: Limit concurrent uploads (prevents 503 errors)
+  // - delayMs=500: 500ms delay between batches (~20 req/sec rate limit)
+  // - Prevents CF Worker 128MB memory errors from large concurrent PUTs
+  const batchSize = 3;
+  const delayMs = 500;
+
+  for (let i = 0; i < chunksToSave.length; i += batchSize) {
+    const batch = chunksToSave.slice(i, i + batchSize);
+    const batchPromises = batch.map(async (chunk, idx) => {
+      const chunkNum = i + idx;
+      const chunkFileName = getChunkFileName(chunkNum, indexFilesChunkPrefix);
+      const chunkPath = `${basePath}/${chunkFileName}`;
+      const sheets = { media: chunk };
+
+      const formData = await createMultiSheet(sheets);
+      return workerDaFetch(`${daOrigin}/source${chunkPath}`, imsToken, {
+        method: 'PUT',
+        body: formData,
+      });
     });
 
-    savePromises.push(savePromise);
-  }
-
-  const responses = await Promise.all(savePromises);
+    const responses = await Promise.all(batchPromises);
 
-  // Validate all chunks saved successfully
-  const failedChunks = [];
-  responses.forEach((resp, i) => {
-    if (!resp.ok) {
-      failedChunks.push(i);
+    const failed = responses.filter((r) => !r.ok);
+    if (failed.length > 0) {
+      throw new Error(`Batch ${Math.floor(i / batchSize)} failed: ${failed.length} chunks`);
     }
-  });
 
-  if (failedChunks.length > 0) {
-    throw new Error(`Failed to save chunks: ${failedChunks.join(', ')}`);
+    if (i + batchSize < chunksToSave.length) {
+      await new Promise((resolve) => { setTimeout(resolve, delayMs); });
+    }
   }
 
   return chunksToSave.length;
 }
 
 /**
- * Worker-safe version of saveIndexMeta from admin-api.js
+ * Save index metadata to DA storage
+ * Must be called AFTER saveIndexChunks to ensure chunkCount is accurate
  *
- * @param {object} meta - Metadata object
- * @param {string} path - Full path to meta file
- * @param {string} daOrigin - DA origin (e.g., https://admin.da.live)
+ * @param {object} meta - Metadata object containing indexType, timestamp, chunkCount, etc.
+ * @param {string} path - Full path to meta file (e.g., '/site/.da/media-insights/index-meta.json')
+ * @param {string} daOrigin - DA origin (e.g., 'https://admin.da.live')
  * @param {string} imsToken - IMS access token
  * @returns {Promise<Response>}
  */
diff --git a/nx/blocks/media-library/indexing/worker/full.js b/nx/blocks/media-library/indexing/worker/full.js
index b38411e2..3a3d5230 100644
--- a/nx/blocks/media-library/indexing/worker/full.js
+++ b/nx/blocks/media-library/indexing/worker/full.js
@@ -8,12 +8,11 @@
 
 import {
   streamLog,
-  saveIndexMeta,
   saveIndexChunks,
+  saveIndexMeta,
+  getAdaptiveChunkSize,
 } from './fetch.js';
 import runBulkStatus from './bulk-status.js';
-// Use worker-safe stub for processLinkedContent
-// (avoids admin-api.js → daFetch.js → public/utils/constants.js)
 import {
   processLinkedContent,
 } from './linked-content.js';
@@ -38,13 +37,9 @@ import {
   sortMediaData,
   getContentPathFromSitePath,
 } from '../parse-utils.js';
-import { buildMediaSheet, buildUsageSheet } from '../sheets.js';
+import { buildMediaSheet } from '../sheets.js';
 import { canonicalizeMediaUrl } from '../../core/urls.js';
-import {
-  IndexFiles,
-} from '../../core/constants.js';
-import { MediaLibraryError, ErrorCodes, logMediaLibraryError } from '../../core/errors.js';
-import { t } from '../../core/messages.js';
+import { IndexFiles } from '../../core/constants.js';
 
 const PERF_TAG = 'phase3-split-sheets';
 const INDEX_SCHEMA_VERSION = 2;
@@ -603,24 +598,21 @@ export async function buildFullIndex(
   const saveStart = Date.now();
   const sheetBuildStart = Date.now();
   const mediaSheet = buildMediaSheet(sortedIndex);
-  const usageSheet = buildUsageSheet(index);
   const sheetBuildMs = Date.now() - sheetBuildStart;
 
   onProgress({
     stage: 'saving',
-    message: `Saving ${mediaSheet.length} media entries, ${usageSheet.length} page-hash pairs...`,
+    message: `Saving ${mediaSheet.length} media entries...`,
   });
 
   const basePath = `${sitePath}/${IndexFiles.FOLDER}`;
-  const chunkSize = IndexConfig.MEDIA_INDEX_CHUNK_SIZE;
+  const chunkSize = getAdaptiveChunkSize(mediaSheet.length);
   const multiSheetStart = Date.now();
 
-  // Save as chunks
   const uploadStart = Date.now();
   const chunkCount = await saveIndexChunks(
     basePath,
     mediaSheet,
-    usageSheet,
     chunkSize,
     daOrigin,
     imsToken,
@@ -629,8 +621,7 @@ export async function buildFullIndex(
   const uploadMs = Date.now() - uploadStart;
   const multiSheetMs = Date.now() - multiSheetStart;
 
-  // Calculate approximate payload size (for perf tracking)
-  const payloadSizeBytes = mediaSheet.length * 200; // Rough estimate
+  const payloadSizeBytes = mediaSheet.length * 200;
   const payloadSizeKB = Math.round((payloadSizeBytes / 1024) * 10) / 10;
   const payloadSizeMB = Math.round((payloadSizeBytes / (1024 * 1024)) * 100) / 100;
 
@@ -639,7 +630,6 @@ export async function buildFullIndex(
     lastFetchTime: Date.now(),
     entriesCount: index.length,
     mediaCount: mediaSheet.length,
-    usageCount: usageSheet.length,
     lastRefreshBy: 'media-indexer',
     lastBuildMode: buildMode,
     chunked: true,
@@ -648,18 +638,9 @@ export async function buildFullIndex(
     schemaVersion: INDEX_SCHEMA_VERSION,
   }, `${sitePath}/${IndexFiles.FOLDER}/${IndexFiles.MEDIA_INDEX_META}`, daOrigin, imsToken);
   const metaSaveMs = Date.now() - metaSaveStart;
+
   if (!metaResp.ok) {
-    const partialMsg = t('PARTIAL_SAVE');
-    const metaPathFull = `${sitePath}/${IndexFiles.FOLDER}/${IndexFiles.MEDIA_INDEX_META}`;
-    logMediaLibraryError(ErrorCodes.PARTIAL_SAVE, {
-      indexSaved: true,
-      metaSaved: false,
-      endpoint: metaPathFull,
-    });
-    throw new MediaLibraryError(ErrorCodes.PARTIAL_SAVE, partialMsg, {
-      indexSaved: true,
-      metaSaved: false,
-    });
+    throw new Error('Failed to save index metadata');
   }
 
   perf.saveDurationMs = Date.now() - saveStart;
@@ -676,12 +657,11 @@ export async function buildFullIndex(
 
   onProgress({
     stage: 'complete',
-    message: `Complete! ${mediaSheet.length} media, ${usageSheet.length} page refs`,
+    message: `Complete! ${mediaSheet.length} media`,
   });
 
   perf.indexEntries = index.length;
   perf.mediaCount = mediaSheet.length;
-  perf.usageCount = usageSheet.length;
   perf.totalDurationMs = Date.now() - t0;
   perf.collectedAt = new Date().toISOString();
 
diff --git a/nx/blocks/media-library/indexing/worker/incremental.js b/nx/blocks/media-library/indexing/worker/incremental.js
index 890c379c..a089bbde 100644
--- a/nx/blocks/media-library/indexing/worker/incremental.js
+++ b/nx/blocks/media-library/indexing/worker/incremental.js
@@ -10,10 +10,11 @@
 import {
   streamLog,
   loadSheetMeta,
-  saveIndexMeta,
   saveIndexChunks,
+  saveIndexMeta,
   loadMultiSheet,
   loadIndexChunks,
+  getAdaptiveChunkSize,
 } from './fetch.js';
 // Use worker-safe stub for processLinkedContent
 // (avoids admin-api.js → daFetch.js → public/utils/constants.js)
@@ -36,7 +37,7 @@ import {
   sortMediaData,
   getContentPathFromSitePath,
 } from '../parse-utils.js';
-import { buildMediaSheet } from '../sheets.js';
+import { buildMediaSheet, buildUsageSheet, buildUsageMap } from '../sheets.js';
 import {
   IndexConfig,
   IndexFiles,
@@ -164,7 +165,6 @@ export async function buildIncrementalIndex(
       existingIndex = [];
       usageData = [];
     } else {
-      // Load all media chunks
       existingIndex = await loadIndexChunks(
         basePath,
         chunkCount,
@@ -172,28 +172,16 @@ export async function buildIncrementalIndex(
         daOrigin,
         imsToken,
       );
-      // Load usage only from chunk 0 (it's only stored there)
-      const chunk0Path = `${basePath}/${IndexFiles.MEDIA_INDEX_CHUNK_PREFIX}000.json`;
-      usageData = await loadMultiSheet(chunk0Path, SheetNames.USAGE, daOrigin, imsToken);
+      usageData = buildUsageSheet(existingIndex);
     }
   } else {
-    // Load from single file (backward compatibility)
     existingIndex = await loadMultiSheet(indexPath, SheetNames.MEDIA, daOrigin, imsToken);
-    usageData = await loadMultiSheet(indexPath, SheetNames.USAGE, daOrigin, imsToken);
+    usageData = buildUsageSheet(existingIndex);
   }
 
   perf.loadExistingMs = Date.now() - loadStart;
 
-  const usageMap = new Map();
-  usageData.forEach((entry) => {
-    try {
-      const hashes = JSON.parse(entry.hashes);
-      usageMap.set(entry.page, new Set(hashes));
-    } catch (error) {
-      // eslint-disable-next-line no-console
-      console.warn(`[MediaIndexer] Skipping malformed usage entry for page: ${entry.page}`, error);
-    }
-  });
+  const usageMap = buildUsageMap(usageData);
 
   // Normalize hash format in existing index entries
   // Hash should always be bare (e.g. "abc123"), never with prefix (e.g. "media_abc123.jpg")
@@ -479,14 +467,14 @@ export async function buildIncrementalIndex(
     let totalImageEntries = 0;
 
     updatedIndex.forEach((entry) => {
-      const isImage = entry.type === 'image' || entry.type === 'video';
+      const op = entry.operation || entry.source;
+      const isExternalMedia = op === 'extlinks-parsed' || op === 'markdown-parsed';
+      const isImage = (entry.type === 'image' || entry.type === 'video') && !isExternalMedia;
       if (isImage) totalImageEntries += 1;
 
       const hasDoc = entry.doc && entry.doc !== '';
-      const op = entry.operation || entry.source;
-      const isExternalMedia = op === 'extlinks-parsed' || op === 'markdown-parsed';
 
-      if (isImage && hasDoc && !isExternalMedia) {
+      if (isImage && hasDoc) {
         let entryPath;
         try {
           entryPath = new URL(entry.url).pathname;
@@ -549,39 +537,27 @@ export async function buildIncrementalIndex(
     }
   });
 
-  // Build usage sheet from usageMap to preserve all page references (changed AND unchanged)
-  // buildUsageSheet(updatedIndex) would only include changed pages since existingIndex
-  // is loaded from deduplicated media sheet without doc fields
-  const usageSheet = Array.from(usageMap.entries()).map(([page, hashSet]) => ({
-    page,
-    hashes: JSON.stringify(Array.from(hashSet)),
-  }));
-
   onProgress({
     stage: 'saving',
-    message: `Saving ${mediaSheet.length} media entries, ${usageSheet.length} page-hash pairs...`,
+    message: `Saving ${mediaSheet.length} media entries...`,
   });
 
   const saveStart = Date.now();
-  const chunkSize = IndexConfig.MEDIA_INDEX_CHUNK_SIZE;
+  const chunkSize = getAdaptiveChunkSize(mediaSheet.length);
 
-  // MODIFIED: Use worker-safe saveIndexChunks with imsToken
   const chunkCount = await saveIndexChunks(
     basePath,
     mediaSheet,
-    usageSheet,
     chunkSize,
     daOrigin,
     imsToken,
     IndexFiles.MEDIA_INDEX_CHUNK_PREFIX,
   );
 
-  // MODIFIED: Use worker-safe saveIndexMeta with imsToken
   const metaResp = await saveIndexMeta({
     lastFetchTime: Date.now(),
     entriesCount: updatedIndex.length,
     mediaCount: mediaSheet.length,
-    usageCount: usageSheet.length,
     lastRefreshBy: 'media-indexer',
     lastBuildMode: 'incremental',
     chunked: true,
@@ -593,16 +569,16 @@ export async function buildIncrementalIndex(
   if (!metaResp.ok) {
     throw new Error('Failed to save index metadata');
   }
+
   perf.saveDurationMs = Date.now() - saveStart;
 
   onProgress({
     stage: 'complete',
-    message: `Incremental complete! ${mediaSheet.length} media, ${usageSheet.length} page refs (${added} added, ${removed} removed)`,
+    message: `Incremental complete! ${mediaSheet.length} media (${added} added, ${removed} removed)`,
   });
 
   perf.indexEntries = updatedIndex.length;
   perf.mediaCount = mediaSheet.length;
-  perf.usageCount = usageSheet.length;
   perf.totalDurationMs = Date.now() - t0;
   perf.collectedAt = new Date().toISOString();
   logPerf(perf, isPerfEnabled);
diff --git a/nx/blocks/media-library/indexing/worker/linked-content.js b/nx/blocks/media-library/indexing/worker/linked-content.js
index 1d4fdd36..6d3681ae 100644
--- a/nx/blocks/media-library/indexing/worker/linked-content.js
+++ b/nx/blocks/media-library/indexing/worker/linked-content.js
@@ -242,7 +242,7 @@ export async function processLinkedContent(
         (e) => isExtlinksEntry(e)
           && e.doc
           && parsedPages.has(normalizePath(e.doc))
-          && !linkedPages.includes(e.doc),
+          && !linkedPages.some((p) => normalizePath(p) === normalizePath(e.doc)),
       );
       obsolete.forEach((e) => {
         updatedIndex.splice(updatedIndex.indexOf(e), 1);

From e8c315704fa41100ed92a0b8a841c43b4e3dd503 Mon Sep 17 00:00:00 2001
From: Kiran Murugulla <murugull@adobe.com>
Date: Mon, 4 May 2026 15:28:16 -0400
Subject: [PATCH 2/2] media-library : better logging to identify chunk failures

---
 nx/blocks/media-library/indexing/worker/fetch.js       | 10 ++++++++--
 nx/blocks/media-library/indexing/worker/full.js        |  2 +-
 nx/blocks/media-library/indexing/worker/incremental.js |  2 +-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/nx/blocks/media-library/indexing/worker/fetch.js b/nx/blocks/media-library/indexing/worker/fetch.js
index 202b2917..a2c2da43 100644
--- a/nx/blocks/media-library/indexing/worker/fetch.js
+++ b/nx/blocks/media-library/indexing/worker/fetch.js
@@ -811,9 +811,15 @@ export async function saveIndexChunks(
 
     const responses = await Promise.all(batchPromises);
 
-    const failed = responses.filter((r) => !r.ok);
+    const failed = [];
+    responses.forEach((r, idx) => {
+      if (!r.ok) {
+        failed.push({ chunkNum: i + idx, status: r.status });
+      }
+    });
     if (failed.length > 0) {
-      throw new Error(`Batch ${Math.floor(i / batchSize)} failed: ${failed.length} chunks`);
+      const chunkNums = failed.map((f) => `${f.chunkNum} (${f.status})`).join(', ');
+      throw new Error(`Failed to save ${failed.length} chunk(s): ${chunkNums}`);
     }
 
     if (i + batchSize < chunksToSave.length) {
diff --git a/nx/blocks/media-library/indexing/worker/full.js b/nx/blocks/media-library/indexing/worker/full.js
index 3a3d5230..63372c8c 100644
--- a/nx/blocks/media-library/indexing/worker/full.js
+++ b/nx/blocks/media-library/indexing/worker/full.js
@@ -640,7 +640,7 @@ export async function buildFullIndex(
   const metaSaveMs = Date.now() - metaSaveStart;
 
   if (!metaResp.ok) {
-    throw new Error('Failed to save index metadata');
+    throw new Error(`Failed to save index metadata: HTTP ${metaResp.status}`);
   }
 
   perf.saveDurationMs = Date.now() - saveStart;
diff --git a/nx/blocks/media-library/indexing/worker/incremental.js b/nx/blocks/media-library/indexing/worker/incremental.js
index a089bbde..836343a6 100644
--- a/nx/blocks/media-library/indexing/worker/incremental.js
+++ b/nx/blocks/media-library/indexing/worker/incremental.js
@@ -567,7 +567,7 @@ export async function buildIncrementalIndex(
   }, metaPath, daOrigin, imsToken);
 
   if (!metaResp.ok) {
-    throw new Error('Failed to save index metadata');
+    throw new Error(`Failed to save index metadata: HTTP ${metaResp.status}`);
   }
 
   perf.saveDurationMs = Date.now() - saveStart;