In [5]:
# Install necessary libraries
#!pip install llama-index sentence-transformers
!pip install llama-index-embeddings-huggingface

import os
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# 1. Configure embedding model
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")
Settings.embed_model = embed_model

# 2. Data Ingestion
data_folder = r"C:\python\nanobrowser\chrome-extension"  # Replace with your folder path
if not os.path.exists(data_folder):
    print(f"Error: The data folder '{data_folder}' does not exist.")
else:
    print(f"Loading documents from {data_folder}...")
    documents = SimpleDirectoryReader(data_folder).load_data()
    
    # 3. Indexing
    print("Building vector index...")
    index = VectorStoreIndex.from_documents(documents)
    
    # 4. Simple Retrieval (without LLM generation)
    retriever = index.as_retriever(similarity_top_k=3)
    query = "How planner work?"
    retrieved_nodes = retriever.retrieve(query)
    
    # 5. Print retrieved text chunks
    print(f"\nTop {len(retrieved_nodes)} retrieved chunks for query: '{query}'")
    for i, node in enumerate(retrieved_nodes):
        print(f"\n--- Result {i+1} (Score: {node.score:.4f}) ---")
        print(node.text[:300] + "..." if len(node.text) > 300 else node.text)

Collecting llama-index-embeddings-huggingface
  Downloading llama_index_embeddings_huggingface-0.5.3-py3-none-any.whl.metadata (767 bytes)
Downloading llama_index_embeddings_huggingface-0.5.3-py3-none-any.whl (9.0 kB)
Installing collected packages: llama-index-embeddings-huggingface
Successfully installed llama-index-embeddings-huggingface-0.5.3



[notice] A new release of pip is available: 25.0.1 -> 25.1
[notice] To update, run: python.exe -m pip install --upgrade pip


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Loading documents from C:\python\nanobrowser\chrome-extension...
Building vector index...

Top 3 retrieved chunks for query: 'How planner work?'

--- Result 1 (Score: -0.0319) ---
{
  "name": "chrome-extension",
  "version": "0.1.4",
  "description": "chrome extension - core settings",
  "type": "module",
  "scripts": {
    "clean:node_modules": "pnpx rimraf node_modules",
    "clean:turbo": "rimraf .turbo",
    "clean": "pnpm clean:turbo && pnpm clean:node_modules",
    "bui...

--- Result 2 (Score: -0.0378) ---
import { resolve } from 'node:path';
import { defineConfig, type PluginOption } from "vite";
import libAssetsPlugin from '@laynezh/vite-plugin-lib-assets';
import makeManifestPlugin from './utils/plugins/make-manifest-plugin';
import { watchPublicPlugin, watchRebuildPlugin } from '@extension/hmr';
i...

--- Result 3 (Score: -0.0769) ---
import fs from 'node:fs';
import deepmerge from 'deepmerge';

const packageJson = JSON.parse(fs.readFileSync('../package.json', 'utf8'));

con