In [61]:
# !pip install llama_index
# !pip install python_dotenv

In [1]:
from IPython.display import Image

## Concepts

In [2]:
Image(url='https://docs.llamaindex.ai/en/latest/_static/getting_started/basic_rag.png', width=500)

- https://docs.llamaindex.ai/en/latest/getting_started/concepts/
- https://docs.llamaindex.ai/en/latest/getting_started/starter_example/
- data => index (chunks => embedding vectors)
    - embedding model: 'text-embedding-ada-002'
        - POST https://api.openai.com/v1/embeddings
        - `from llama_index.embeddings.openai import OpenAIEmbedding`
    - query => index (embedding vector)
- retrieve  similarity_top_k  (default 2)
- settings
    - embed_model: 'text-embedding-ada-002'
    - llm: gpt-3.5-turbo

## Default Prompt Template

### qa_template

```
'Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: '
```

### refine_template

```
The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 
```

### simple_template

```
'{query_str}'
```

### summary_template

```
'Context information from multiple sources is below.
---------------------
{context_str}
---------------------
Given the information from multiple sources and not prior knowledge, answer the query.
Query: {query_str}
Answer: '
```

### system prompt

```
system: You are an expert Q&A system that is trusted around the world.
Always answer the query using the provided context information, and not prior knowledge.
Some rules to follow:
1. Never directly reference the given context in your answer.
2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.
```

## ResponseMode

- default: Compact

In [48]:
from llama_index.core.response_synthesizers.type import ResponseMode

In [58]:
ResponseMode.REFINE, ResponseMode.COMPACT, ResponseMode.SIMPLE_SUMMARIZE, ResponseMode.TREE_SUMMARIZE, ResponseMode.GENERATION

(<ResponseMode.REFINE: 'refine'>,
 <ResponseMode.COMPACT: 'compact'>,
 <ResponseMode.SIMPLE_SUMMARIZE: 'simple_summarize'>,
 <ResponseMode.TREE_SUMMARIZE: 'tree_summarize'>,
 <ResponseMode.GENERATION: 'generation'>)

## demo

In [3]:
from dotenv import load_dotenv
load_dotenv('./.env')

True

In [4]:
import os
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7890'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7890'

In [5]:
# import logging
# import sys
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

### load documents

In [6]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader("data").load_data()

In [7]:
documents[0].text_template, documents[0].metadata_seperator, documents[0].metadata_template

('{metadata_str}\n\n{content}', '\n', '{key}: {value}')

### to index & query

- chunk_size, chunk_overlap
    - chunk_size 1024
    - chunk_overlap 200
- split & merge
- nodes
- embeds to nodes
    - text-embedding-ada-002: 1536 (512*3)

In [35]:
1536/512

3.0

In [25]:
index = VectorStoreIndex.from_documents(documents, show_progress=True)

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/22 [00:00<?, ?it/s]

In [63]:
# index.docstore.docs['9655ddd8-a3e7-46a5-b709-dca099020c81']

In [65]:
# index.vector_store.data.embedding_dict['9655ddd8-a3e7-46a5-b709-dca099020c81']

In [13]:
query_engine = index.as_query_engine()

In [15]:
response = query_engine.query("What did the author do growing up?")

In [16]:
response.response

'The author worked on writing short stories and programming, starting with early programming experiences on an IBM 1401 using Fortran in 9th grade. Later, the author transitioned to working with microcomputers, building simple games and a word processor on a TRS-80 in the early 1980s.'

In [17]:
response.metadata

{'8aefc79e-2d08-4041-8f7e-294bf4f74b6a': {'file_path': '/home/whaow/workspaces/llm_aigc/tutorials/rag/data/paul_graham_essay.txt',
  'file_name': 'paul_graham_essay.txt',
  'file_type': 'text/plain',
  'file_size': 75042,
  'creation_date': '2024-06-15',
  'last_modified_date': '2024-06-15'},
 '4a01aa3a-d5ac-49f2-aed6-dbd0ad477bfe': {'file_path': '/home/whaow/workspaces/llm_aigc/tutorials/rag/data/paul_graham_essay.txt',
  'file_name': 'paul_graham_essay.txt',
  'file_type': 'text/plain',
  'file_size': 75042,
  'creation_date': '2024-06-15',
  'last_modified_date': '2024-06-15'}}

In [19]:
len(response.source_nodes)

2

In [24]:
response.response

'The author worked on writing short stories and programming, starting with early programming experiences on an IBM 1401 using Fortran in 9th grade. Later, the author transitioned to working with microcomputers, building simple games and a word processor on a TRS-80 in the early 1980s.'