# References

* Agentic RAG [arxiv-paper-curator](https://github.com/jamwithai/arxiv-paper-curator)

In [1]:
import os
import sys

import numpy as np

run_env = os.getenv('RUN_ENV', 'COLLAB')
if run_env == 'COLLAB':
  from google.colab import drive
  ROOT_DIR = '/content/drive'
  drive.mount(ROOT_DIR)
  print('Google drive connected')
  root_data_dir = os.path.join(ROOT_DIR, 'MyDrive', 'ml_course_data')
  lib_path = os.path.join(ROOT_DIR, 'MyDrive', 'src')
  if not os.path.exists(lib_path):
    raise RuntimeError('Upload and `src` dir with code')
  sys.path.append(lib_path)
else:
  root_data_dir = os.getenv('DATA_DIR', '/srv/data')

if not os.path.exists(root_data_dir):
  raise RuntimeError('Data dir not exists')
else:
  print('Data dir content %s: %s' % (root_data_dir, ', '.join(os.listdir(root_data_dir)[:5])))

Data dir content /Users/adzhumurat/PycharmProjects/ai_product_engineer/data: client_segmentation.csv, messages.db, labeled_data_corpus.csv, chroma, content_description.csv


In [2]:
from rag.connections import get_chroma_client

client = get_chroma_client()
client

Connecting to ChromaDB service at http://localhost:8000


<chromadb.api.client.Client at 0x1076d4dd0>

In [20]:
import requests

host = 'http://0.0.0.0:11434'
query = '—á—Ç–æ —Ç–∞–∫–æ–µ –∞–Ω—Ç–∏–≥—Ä–∞–¥–∏–µ–Ω—Ç'
timeout = 5

model = 'granite4:350m'
model = 'qwen2.5:1.5b'

url = host.rstrip("/") + "/api/embeddings"
response = requests.post(
    url,
    json={"model": model, "prompt": query},
    timeout=timeout,
)
response.raise_for_status()
data = response.json()

query_embedding = data.get("embedding") or data.get("embeddings")
print(np.array(query_embedding).shape)

(1536,)


In [21]:
include = ["documents", "metadatas", "distances"]
collection = client.get_collection('docs1')
limit = 10

collection.query(
    query_embeddings=[query_embedding],
    n_results=limit,
    include=include,
)

{'ids': [['44910ac67cd260627cc10b7a1e98ffc0_5',
   '521cd333018c5db9ad1eaf97b0c7b5dd_57',
   'e2a544eb5aafdaa54e2eece40381c0ef_27',
   '521cd333018c5db9ad1eaf97b0c7b5dd_49',
   'e102ed911c56b270ee42a7663c35e4b8_1',
   '521cd333018c5db9ad1eaf97b0c7b5dd_39',
   '11ca17fdd8bb5b27217f9691effa4e03_5',
   '87e5ac00900a7395167b7338d840887e_6',
   '44afae845aa8ae52cd29d3ec7e50f745_14',
   'e2a544eb5aafdaa54e2eece40381c0ef_39']],
 'distances': [[5993.615,
   6101.614,
   6239.2065,
   6521.266,
   6534.8027,
   6565.432,
   6649.8496,
   6649.8496,
   6664.7915,
   6682.0903]],
 'embeddings': None,
 'metadatas': [[{'source_file_name': 'vol_04_deep_dive_09_trees_boosting.ipynb',
    'chunk_index': '5',
    'source': 'data/md_docs/vol_04_deep_dive_09_trees_boosting.md',
    'source_dir': '/Users/adzhumurat/PycharmProjects/ai_product_engineer/jupyter_notebooks'},
   {'chunk_index': '57',
    'source_file_name': 'vol_04_deep_dive_00_probability.ipynb',
    'source': 'data/md_docs/vol_04_deep_dive_0

In [25]:
docs_path = os.path.join(root_data_dir, 'md_docs')
for current_file in os.listdir(docs_path):
    file_path = os.path.join(docs_path, current_file)
    with open(file_path, 'r') as f:
        file_content = f.read()
    print(len(file_content))

25743
12328
50755
22446
13410
12348
23547
9653
45966
22105
19253
15822
26619
9263
13511
2108
32743
31300
25222
40373
6008
42048
35539
12521
8400
50919
2246
13204
15410
1065
14506
31357
20802
23344
35824
21435



# RAG: Classical vs Agentic


Classical RAG: —Ñ–∏–∫—Å–∏—Ä–æ–≤–∞–Ω–Ω—ã–π pipeline
```
Query ‚Üí Embedding ‚Üí Vector Search ‚Üí Top-K ‚Üí LLM ‚Üí Answer

–ü—Ä–æ–±–ª–µ–º—ã:
‚ùå –û–¥–∏–Ω –ø—Ä–æ—Ö–æ–¥ ‚Äî –Ω–µ—Ç –≤–æ–∑–º–æ–∂–Ω–æ—Å—Ç–∏ –∏—Å–ø—Ä–∞–≤–∏—Ç—å –æ—à–∏–±–∫–∏
‚ùå –í—Å–µ–≥–¥–∞ –∏—â–µ—Ç, –¥–∞–∂–µ –µ—Å–ª–∏ –Ω–µ –Ω—É–∂–Ω–æ
‚ùå –ù–µ –º–æ–∂–µ—Ç –∞–¥–∞–ø—Ç–∏—Ä–æ–≤–∞—Ç—å —Å—Ç—Ä–∞—Ç–µ–≥–∏—é
```

### Agentic RAG: Actor-Critic –ø–∞—Ç—Ç–µ—Ä–Ω
```
Query ‚Üí Agent (Actor) ‚Üí Decision Loop
           ‚Üì
    –ù—É–∂–µ–Ω –ª–∏ retrieval?
           ‚Üì
    [Vector Search / Web / SQL / Direct Answer]
           ‚Üì
    Critic –æ—Ü–µ–Ω–∏–≤–∞–µ—Ç –∫–∞—á–µ—Å—Ç–≤–æ
           ‚Üì
    –•–æ—Ä–æ—à–æ? ‚Üí –û—Ç–≤–µ—Ç
    –ü–ª–æ—Ö–æ? ‚Üí Reformulate –∏ –ø–æ–≤—Ç–æ—Ä–∏—Ç—å
```

**Actor** –ø—Ä–∏–Ω–∏–º–∞–µ—Ç —Ä–µ—à–µ–Ω–∏—è:
- –ö–∞–∫–æ–π –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å?
- –°–∫–æ–ª—å–∫–æ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤ –Ω—É–∂–Ω–æ?
- –ù—É–∂–Ω–æ –ª–∏ –ø–µ—Ä–µ—Ñ–æ—Ä–º—É–ª–∏—Ä–æ–≤–∞—Ç—å –∑–∞–ø—Ä–æ—Å?

**Critic** –æ—Ü–µ–Ω–∏–≤–∞–µ—Ç:
- Relevance score (–Ω–∞—Å–∫–æ–ª—å–∫–æ —Ä–µ–ª–µ–≤–∞–Ω—Ç–Ω—ã –¥–æ–∫—É–º–µ–Ω—Ç—ã)
- Faithfulness (–Ω–µ—Ç –ª–∏ –≥–∞–ª–ª—é—Ü–∏–Ω–∞—Ü–∏–π)
- Completeness (–¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ –ª–∏ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏)

**–°—Ä–∞–≤–Ω–µ–Ω–∏–µ:**

| –ê—Å–ø–µ–∫—Ç | Classical RAG | Agentic RAG |
|--------|---------------|-------------|
| –ê—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä–∞ | –õ–∏–Ω–µ–π–Ω—ã–π pipeline | –¶–∏–∫–ª–∏—á–µ—Å–∫–∏–π —Å reasoning |
| Retrieval | –í—Å–µ–≥–¥–∞ –≤—ã–ø–æ–ª–Ω—è–µ—Ç—Å—è | –¢–æ–ª—å–∫–æ –∫–æ–≥–¥–∞ –Ω—É–∂–Ω–æ |
| Tools | –¢–æ–ª—å–∫–æ vector search | Multiple (web, SQL, code) |
| Latency | –ü—Ä–µ–¥—Å–∫–∞–∑—É–µ–º–∞—è | –ü–µ—Ä–µ–º–µ–Ω–Ω–∞—è |
| Quality | –ó–∞–≤–∏—Å–∏—Ç –æ—Ç –ø–µ—Ä–≤–æ–≥–æ retrieval | –ê–¥–∞–ø—Ç–∏–≤–Ω–æ–µ —É–ª—É—á—à–µ–Ω–∏–µ |

üìö **–ò—Å—Ç–æ—á–Ω–∏–∫–∏:**
- [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks (Lewis et al., 2020)](https://arxiv.org/abs/2005.11401) ‚Äî –æ—Ä–∏–≥–∏–Ω–∞–ª—å–Ω–∞—è RAG —Å—Ç–∞—Ç—å—è
- [ReAct: Synergizing Reasoning and Acting in Language Models](https://arxiv.org/abs/2210.03629)
- [LangChain Documentation on Agents](https://python.langchain.com/docs/modules/agents/)
