In [1]:
# setup
import os
from dotenv import find_dotenv, load_dotenv
_ = load_dotenv(find_dotenv())
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core import StorageContext
from llama_index.core import load_index_from_storage

from llama_index.llms.openai import OpenAI
from llama_index.core.postprocessor.llm_rerank import LLMRerank
from llama_index.core import QueryBundle
from llama_index.core.response_synthesizers import CompactAndRefine

In [2]:
# load index
INDEX_DIR = 'storage'
# load index from disk
vector_store = FaissVectorStore.from_persist_dir(INDEX_DIR)
storage_context = StorageContext.from_defaults(
    vector_store=vector_store, persist_dir=INDEX_DIR
)
index = load_index_from_storage(storage_context=storage_context)

Loading llama_index.core.storage.kvstore.simple_kvstore from storage/docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from storage/index_store.json.


  from .autonotebook import tqdm as notebook_tqdm


# retriever

In [3]:
# retriever
retriever = index.as_retriever(similarity_top_k=8)
query = '什麼是深度學習的醍醐味?'
#query = '什麼是人工智慧的醍醐味?'
nodes = await retriever.aretrieve(query)
print(f"Retrieved {len(nodes)} nodes.")

Retrieved 8 nodes.


In [4]:
save_node_info = []
for node in nodes:
    rv = {
        'id': node.id_,
        'text': node.text,
        'score': node.score
    }
    save_node_info.append(rv)

In [5]:
node.id_

'e6955f99-8070-4c73-aa72-c338f2c2c1c2'

In [6]:
index.docstore.get_node(node.id_)

TextNode(id_='e6955f99-8070-4c73-aa72-c338f2c2c1c2', embedding=None, metadata={'file_path': '/home/poyuan/workspace/rag30/days/day23/data/source/lee/lee.txt', 'file_name': 'lee.txt', 'file_type': 'text/plain', 'file_size': 14112, 'creation_date': '2025-10-07', 'last_modified_date': '2025-09-19'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0b63edfd-2de7-45d8-993a-786b2b1967a6', node_type='4', metadata={'file_path': '/home/poyuan/workspace/rag30/days/day23/data/source/lee/lee.txt', 'file_name': 'lee.txt', 'file_type': 'text/plain', 'file_size': 14112, 'creation_date': '2025-10-07', 'last_modified_date': '2025-09-19'}, hash='93ea7cda1ebeec3bd19b0131a8b7a66cdc7f985ee30b3f792b0450cc5c724eab'), <NodeRelation

# rerank

## usage

In [7]:
reranker = LLMRerank(
    choice_batch_size=8,
    top_n=3,
    llm=OpenAI(model="gpt-4o-mini")
)
query_bundle = QueryBundle(query)
retrieved_nodes = reranker.postprocess_nodes(
    nodes, query_bundle
)

In [11]:
retrieved_nodes[0].score

9.0

## deep devide

# synthesis

In [8]:
llm = OpenAI(model="gpt-4o-mini")

summarizer = CompactAndRefine(llm=llm, streaming=False, verbose=True)

In [9]:
# response without rerank
response = summarizer.synthesize(query, nodes)
response.response

'深度學習的醍醐味在於模型訓練過程中的焦躁與不確定性，這種等待訓練結果的過程讓人體驗到調參的迷茫和挑戰。透過這樣的過程，學習者能夠理解模型訓練的複雜性，並在面對未來更大挑戰時做好心理準備。'