# RAG 

#### Libraries

In [381]:
# processing:
import os
import re
import pandas as pd
from dotenv import load_dotenv

# neo4j:
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector

# llm:
from langchain.chat_models.gigachat import GigaChat
from langchain_community.embeddings import GigaChatEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# retrieval:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.memory import ConversationBufferMemory

#### .env

In [382]:
load_dotenv()

True

In [383]:
# neo4j:
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE= os.getenv('NEO4J_DATABASE')

# llm:
LLM_SCOPE = os.getenv('SCOPE')
LLM_AUTH = os.getenv('AUTH_DATA')

# telegram:
BOT_TOKEN = os.getenv('BOT_TOKEN')

#### Reference

In [384]:
reference = pd.read_excel('../docs/reference.xlsx')

## Setting Up LLM | GigaChat


[GigaChat](https://developers.sber.ru/docs/ru/gigachat/overview)

In [385]:
llm = GigaChat(credentials = LLM_AUTH, 
               temperature = 0.3, 
               n = 1, 
               model = "GigaChat-Plus", # 32k context window
               repetition_penalty = 1.0,
               verify_ssl_certs = False)

In [386]:
parser = StrOutputParser()

In [387]:
embeddings = GigaChatEmbeddings(credentials = LLM_AUTH, verify_ssl_certs = False)

## Prompt Engineering

#### Prompt Template

In [388]:
template = """
            –ó–∞–¥–∞—á–∞: –ê–Ω–∞–ª–∏–∑–∏—Ä–æ–≤–∞—Ç—å –∑–∞–¥–∞–Ω–Ω—ã–π –∑–∞–ø—Ä–æ—Å –∏ –ø—Ä–µ–¥–æ—Å—Ç–∞–≤–ª—è—Ç—å –¥–µ—Ç–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –æ—Ç–≤–µ—Ç—ã, –æ–ø–∏—Ä–∞—è—Å—å –Ω–∞ –¥–æ–∫—É–º–µ–Ω—Ç—ã, –ø—Ä–∞–≤–∏–ª–∞ –∏ —Ç—Ä–µ–±–æ–≤–∞–Ω–∏—è.

            –ò–Ω—Å—Ç—Ä—É–∫—Ü–∏–∏:

            1. –ü—Ä–∏ –æ—Ç–≤–µ—Ç–µ –Ω–∞ –≤–æ–ø—Ä–æ—Å—ã, –æ—Å–æ–±–µ–Ω–Ω–æ —Ç–µ, –∫–æ—Ç–æ—Ä—ã–µ —Å–≤—è–∑–∞–Ω—ã —Å –ø—Ä–∞–≤–∏–ª–∞–º–∏ –∏–ª–∏ –Ω–æ—Ä–º–∞—Ç–∏–≤–Ω—ã–º–∏ –¥–æ–∫—É–º–µ–Ω—Ç–∞–º–∏, –∞–∫—Ç–∏–≤–Ω–æ —Å—Å—ã–ª–∞–π—Ç–µ—Å—å –Ω–∞ –Ω–æ–º–µ—Ä–∞ –ø—É–Ω–∫—Ç–æ–≤, —Å—Ç–∞—Ç–µ–π –∏ —Ä–∞–∑–¥–µ–ª–æ–≤ –¥–æ–∫—É–º–µ–Ω—Ç–æ–≤.
            2. –ï—Å–ª–∏ —Ç—Ä–µ–±—É–µ–º–∞—è –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –æ—Ç—Å—É—Ç—Å—Ç–≤—É–µ—Ç –∏–ª–∏ –≤—ã –Ω–µ –º–æ–∂–µ—Ç–µ –æ–±–Ω–∞—Ä—É–∂–∏—Ç—å –Ω–µ–æ–±—Ö–æ–¥–∏–º—ã–µ –ø—É–Ω–∫—Ç—ã –≤ –¥–æ–∫—É–º–µ–Ω—Ç–∞—Ö, –∏—Å–ø–æ–ª—å–∑—É–π—Ç–µ —Ñ—Ä–∞–∑—É "–ò–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –Ω–µ –Ω–∞–π–¥–µ–Ω–∞".
            3. –í–∞—à–∏ –æ—Ç–≤–µ—Ç—ã –¥–æ–ª–∂–Ω—ã –±—ã—Ç—å –º–∞–∫—Å–∏–º–∞–ª—å–Ω–æ —Ç–æ—á–Ω—ã–º–∏ –∏ —Å–æ–¥–µ—Ä–∂–∞—Ç—å –Ω–µ —Ç–æ–ª—å–∫–æ —Å—Å—ã–ª–∫–∏ –Ω–∞ –¥–æ–∫—É–º–µ–Ω—Ç—ã, –Ω–æ –∏ –æ–±—ä—è—Å–Ω–µ–Ω–∏—è –∏—Ö –ø—Ä–∏–º–µ–Ω–µ–Ω–∏—è –∫ –¥–∞–Ω–Ω–æ–º—É –∑–∞–ø—Ä–æ—Å—É.

            –í–æ–ø—Ä–æ—Å: {question}
            –ö–æ–Ω—Ç–µ–∫—Å—Ç: {context}
            
            –û—Ç–≤–µ—Ç: –ü—Ä–µ–¥–æ—Å—Ç–∞–≤—å—Ç–µ –≤–∞—à –æ—Ç–≤–µ—Ç, –æ–ø–∏—Ä–∞—è—Å—å –Ω–∞ —É–∫–∞–∑–∞–Ω–Ω—ã–µ –≤—ã—à–µ —É–∫–∞–∑–∞–Ω–∏—è.
            """

prompt = ChatPromptTemplate.from_template(template)

#### Step Back Prompting

In [389]:
def step_back_prompt(model, parser, text):
    
    chain = model | parser

    template = f"""
                –ó–∞–¥–∞—á–∞: –ê–Ω–∞–ª–∏–∑–∏—Ä–æ–≤–∞—Ç—å –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ –∏ –≤—ã–≤–µ—Å—Ç–∏ –∏–∑ –Ω–∏—Ö –≤—ã—Å–æ–∫–æ—É—Ä–æ–≤–Ω–µ–≤—ã–µ –∫–æ–Ω—Ü–µ–ø—Ü–∏–∏ –∏ –æ—Å–Ω–æ–≤–Ω—ã–µ –ø—Ä–∏–Ω—Ü–∏–ø—ã.

                –ò–Ω—Å—Ç—Ä—É–∫—Ü–∏–∏:
                –ü—Ä–µ–∂–¥–µ —á–µ–º –ø—Ä–∏—Å—Ç—É–ø–∏—Ç—å –∫ –∞–±—Å—Ç—Ä–∞–∫—Ü–∏–∏, –≤–∞–∂–Ω–æ:
                1. –û–ø—Ä–µ–¥–µ–ª–∏—Ç—å –∏ –≤—ã–¥–µ–ª–∏—Ç—å –∫–ª—é—á–µ–≤—ã–µ –¥–µ—Ç–∞–ª–∏ –∏ —Å–ø–µ—Ü–∏—Ñ–∏–∫—É –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–Ω–æ–≥–æ –º–∞—Ç–µ—Ä–∏–∞–ª–∞.
                2. –ê–Ω–∞–ª–∏–∑–∏—Ä–æ–≤–∞—Ç—å —Å–≤—è–∑–∏ –º–µ–∂–¥—É –¥–µ—Ç–∞–ª—è–º–∏ –¥–ª—è –ø–æ–Ω–∏–º–∞–Ω–∏—è –±–æ–ª–µ–µ –≥–ª—É–±–æ–∫–∏—Ö –∑–∞–∫–æ–Ω–æ–º–µ—Ä–Ω–æ—Å—Ç–µ–π –∏ –≤–∑–∞–∏–º–æ—Å–≤—è–∑–µ–π.
                3. –°—Ç—Ä—É–∫—Ç—É—Ä–∏—Ä–æ–≤–∞—Ç—å –ø–æ–ª—É—á–µ–Ω–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ, –≤—ã–¥–µ–ª—è—è –æ–±—â–∏–µ —ç–ª–µ–º–µ–Ω—Ç—ã –∏ –ø–∞—Ç—Ç–µ—Ä–Ω—ã.
                4. –§–æ—Ä–º—É–ª–∏—Ä–æ–≤–∞—Ç—å –≤—ã—Å–æ–∫–æ—É—Ä–æ–≤–Ω–µ–≤—ã–µ –∫–æ–Ω—Ü–µ–ø—Ü–∏–∏ –∏ –ø—Ä–∏–Ω—Ü–∏–ø—ã, –æ–ø–∏—Ä–∞—è—Å—å –Ω–∞ –ø—Ä–æ–∞–Ω–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ.

                –¢–µ–∫—Å—Ç: \"{text}\"

                –ü—Ä–æ–∞–Ω–∞–ª–∏–∑–∏—Ä—É–π—Ç–µ –∏ —Å—Ñ–æ—Ä–º—É–ª–∏—Ä—É–π—Ç–µ –æ–±—â–∏–µ –∫–æ–Ω—Ü–µ–ø—Ü–∏–∏ –∏ –æ—Å–Ω–æ–≤–Ω—ã–µ –ø—Ä–∏–Ω—Ü–∏–ø—ã –Ω–∞ –æ—Å–Ω–æ–≤–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω–Ω—ã—Ö –¥–µ—Ç–∞–ª–µ–π –≤ –≤–∏–¥–µ —Ä–∞–∑–≤–µ—Ä–Ω—É—Ç—ã—Ö –≤–æ–ø—Ä–æ—Å–æ–≤.
                """

    model_response = chain.invoke(template)
    
    return model_response

#### Question Extraction

In [390]:
def extract_question(model, parser, text):
    
    chain = model | parser

    template = f"""
                            –ú–Ω–µ –Ω—É–∂–Ω–æ —Ç–≤–æ—ë —Å–æ–¥–µ–π—Å—Ç–≤–∏–µ –≤ –∞–Ω–∞–ª–∏–∑–µ —Å–ª–µ–¥—É—é—â–µ–≥–æ –¥–µ–ª–æ–≤–æ–≥–æ –ø–∏—Å—å–º–∞.
                            –ò–∑–≤–ª–µ–∫–∏ –∏–∑ –Ω–µ–≥–æ –≤—Å—é –≤–∞–∂–Ω—É—é –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –¥–ª—è —Å–∏—Å—Ç–µ–º—ã RAG. 
                            –ù—É–∂–Ω—ã –≤–æ–ø—Ä–æ—Å—ã, –∫–ª—é—á–µ–≤—ã–µ –ø–∞—Ä–∞–º–µ—Ç—Ä—ã, –∏ –æ—Å–Ω–æ–≤–Ω—ã–µ —Ç–µ–º—ã –æ–±—Å—É–∂–¥–µ–Ω–∏—è. –í–æ—Ç —Ç–µ–∫—Å—Ç –ø–∏—Å—å–º–∞:

                            –¢–µ–∫—Å—Ç: \"{text}\"

                            –†–∞–∑–±–µ—Ä–∏ –ø–∏—Å—å–º–æ –Ω–∞ —Å–ª–µ–¥—É—é—â–∏–µ –∫–æ–º–ø–æ–Ω–µ–Ω—Ç—ã:

                            1. –í—Å–µ –ø–æ—Å—Ç–∞–≤–ª–µ–Ω–Ω—ã–µ –≤–æ–ø—Ä–æ—Å—ã.
                            2. –ü–µ—Ä–µ—á–µ–Ω—å –∫–ª—é—á–µ–≤—ã—Ö –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤, —É–ø–æ–º—è–Ω—É—Ç—ã—Ö –≤ —Ç–µ–∫—Å—Ç–µ.
                            3. –û—Å–Ω–æ–≤–Ω—ã–µ —Ç–µ–º—ã, –∫–æ—Ç–æ—Ä—ã–µ –æ–±—Å—É–∂–¥–∞—é—Ç—Å—è.

                            –ò—Å–ø–æ–ª—å–∑—É–π —Å—Ç—Ä—É–∫—Ç—É—Ä–∏—Ä–æ–≤–∞–Ω–Ω—ã–π –ø–æ–¥—Ö–æ–¥, —á—Ç–æ–±—ã —è –º–æ–≥ –ª–µ–≥–∫–æ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å —ç—Ç–∏ –¥–∞–Ω–Ω—ã–µ –¥–ª—è –∑–∞–ø—Ä–æ—Å–æ–≤ –≤ —Å–∏—Å—Ç–µ–º–µ RAG.
                            –ë–µ–∑ –¥–æ–ø–æ–ª–Ω–∏—Ç–µ–ª—å–Ω—ã—Ö –∫–æ–º–º–µ–Ω—Ç–∞—Ä–∏–µ–≤.
                            """

    model_response = chain.invoke(template)
    
    return model_response

## Setting Up Database | Neo4j

[Neo4j](https://workspace-preview.neo4j.io)

In [391]:
neo4j_vector = Neo4jVector.from_existing_index(
    embeddings,
    url = NEO4J_URI,
    username = NEO4J_USERNAME,
    password = NEO4J_PASSWORD,
    index_name = "vector"
    # search_type = 'hybrid'
)

## LLM Retrival Chain

[GigaChain](https://github.com/ai-forever/gigachain)

In [419]:
chain = RetrievalQAWithSourcesChain.from_chain_type(llm,
                                                    chain_type = "stuff", # "stuff", "map_rerank", "refine"
                                                    retriever = neo4j_vector.as_retriever(search_kwargs={"k": 5}),
                                                    return_source_documents = False,
                                                    reduce_k_below_max_tokens=False,
                                                    max_tokens_limit=32000,
                                                    chain_type_kwargs = {
                                                                        "verbose": False,
                                                                        "prompt": prompt, # step_back_prompt
                                                                        "document_variable_name": "context",
                                                                        "memory": ConversationBufferMemory(
                                                                            memory_key='history',
                                                                            input_key='question'),
                                                                        }
                                                    )

## Test

##### Reference Letters + Answers

In [416]:
letter = 5 # from the reference.xlsx num + 2

inquiry = str(reference['letter'][letter].replace('\n', ' '))
refer = str(reference['answer '][letter]).replace('\n', ' ')

abstraction = step_back_prompt(llm, parser, inquiry)
question = extract_question(llm, parser, inquiry)

##### Question Chain

In [417]:
print(f'Inquiry: {inquiry}\n')
print(f'Reference: {refer}\n')

print(f'Question: {question}\n')

chain.invoke(
            {"question": question},
            return_only_outputs = True
            )["answer"]


Inquiry: –î–æ–±—Ä—ã–π –¥–µ–Ω—å! –ü—Ä–∏ –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–∏ –ù–ú–¶ –∑–∞–∫—É–ø–∫–∏ —É—Å–ª—É–≥ –ø–æ –ø–æ—Å—Ç–∞–≤–∫–µ –∏ –º–æ–Ω—Ç–∞–∂—É –∑–¥–∞–Ω–∏—è (–Ω–µ –∫–∞–ø–∏—Ç–∞–ª—å–Ω–æ–µ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–æ) –∫–∞–∫–æ–π –º–µ—Ç–æ–¥ –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è –ù–ú–¶ –¥–æ–ª–∂–µ–Ω –±—ã—Ç—å? 

Reference: –í –ø—É–Ω–∫—Ç–µ 4.9.1. –ü–æ–ª–æ–∂–µ–Ω–∏—è –æ –∑–∞–∫—É–ø–∫–∞—Ö –ø—Ä–µ–¥—É—Å–º–æ—Ç—Ä–µ–Ω–æ, —á—Ç–æ "–ü—Ä–æ–µ–∫—Ç–Ω–æ-—Å–º–µ—Ç–Ω—ã–π –º–µ—Ç–æ–¥ –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è –¥–ª—è –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è –Ω–∞—á–∞–ª—å–Ω–æ–π (–º–∞–∫—Å–∏–º–∞–ª—å–Ω–æ–π) —Ü–µ–Ω—ã –¥–æ–≥–æ–≤–æ—Ä–∞ (–ø—Ä–µ–¥–º–µ—Ç–∞ –∑–∞–∫—É–ø–∫–∏) –Ω–∞ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–æ, —Ä–µ–∫–æ–Ω—Å—Ç—Ä—É–∫—Ü–∏—é –∏ –∫–∞–ø–∏—Ç–∞–ª—å–Ω—ã–π —Ä–µ–º–æ–Ω—Ç –æ–±—ä–µ–∫—Ç–æ–≤, –∞ —Ç–∞–∫–∂–µ –º–æ–∂–µ—Ç –±—ã—Ç—å –ø—Ä–∏–º–µ–Ω–µ–Ω –ø—Ä–∏ –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–∏ –Ω–∞—á–∞–ª—å–Ω–æ–π (–º–∞–∫—Å–∏–º–∞–ª—å–Ω–æ–π) —Ü–µ–Ω—ã –¥–æ–≥–æ–≤–æ—Ä–∞ (–ø—Ä–µ–¥–º–µ—Ç–∞ –∑–∞–∫—É–ø–∫–∏) –Ω–∞ —Ç–µ–∫—É—â–∏–π —Ä–µ–º–æ–Ω—Ç –∑–¥–∞–Ω–∏–π, —Å–æ–æ—Ä—É–∂–µ–Ω–∏–π, —Å—Ç—Ä–æ–µ–Ω–∏–π, –ø–æ–º–µ—â–µ–Ω–∏

'–î–ª—è –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è –Ω–∞—á–∞–ª—å–Ω–æ–π (–º–∞–∫—Å–∏–º–∞–ª—å–Ω–æ–π) —Ü–µ–Ω—ã –∑–∞–∫—É–ø–∫–∏ —É—Å–ª—É–≥ –ø–æ –ø–æ—Å—Ç–∞–≤–∫–µ –∏ –º–æ–Ω—Ç–∞–∂—É –∑–¥–∞–Ω–∏—è (–Ω–µ –∫–∞–ø–∏—Ç–∞–ª—å–Ω–æ–µ —Å—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–æ) —Ä–µ–∫–æ–º–µ–Ω–¥—É–µ—Ç—Å—è –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –º–µ—Ç–æ–¥ —Å–æ–ø–æ—Å—Ç–∞–≤–∏–º—ã—Ö —Ä—ã–Ω–æ—á–Ω—ã—Ö —Ü–µ–Ω (–∞–Ω–∞–ª–∏–∑ —Ä—ã–Ω–∫–∞). –î–ª—è —ç—Ç–æ–≥–æ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –Ω–µ –º–µ–Ω–µ–µ —Ç—Ä–µ—Ö —Ü–µ–Ω —Ç–æ–≤–∞—Ä–∞, –ø—Ä–µ–¥–ª–∞–≥–∞–µ–º—ã—Ö —Ä–∞–∑–ª–∏—á–Ω—ã–º–∏ –ø–æ—Å—Ç–∞–≤—â–∏–∫–∞–º–∏. –ù–∞—á–∞–ª—å–Ω–∞—è (–º–∞–∫—Å–∏–º–∞–ª—å–Ω–∞—è) —Ü–µ–Ω–∞ –æ–ø—Ä–µ–¥–µ–ª—è–µ—Ç—Å—è –ø–æ —Ñ–æ—Ä–º—É–ª–µ: –ù–ú–¶=ùë£ùëõ*ùëñ=1ùëõ‚àë–¶ùëñ, –≥–¥–µ: –ù–ú–¶ - –Ω–∞—á–∞–ª—å–Ω–∞—è (–º–∞–∫—Å–∏–º–∞–ª—å–Ω–∞—è) —Ü–µ–Ω–∞ –¥–æ–≥–æ–≤–æ—Ä–∞ (–ø—Ä–µ–¥–º–µ—Ç–∞ –∑–∞–∫—É–ø–∫–∏), –æ–ø—Ä–µ–¥–µ–ª—è–µ–º–∞—è –º–µ—Ç–æ–¥–æ–º —Å–æ–ø–æ—Å—Ç–∞–≤–∏–º—ã—Ö —Ä—ã–Ω–æ—á–Ω—ã—Ö —Ü–µ–Ω (–∞–Ω–∞–ª–∏–∑ —Ä—ã–Ω–∫–∞);ùë£ - –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ (–æ–±—ä–µ–º) –∑–∞–∫—É–ø–∞–µ–º–æ–≥–æ —Ç–æ–≤–∞—Ä–∞ (—Ä

In [418]:
docs_with_score = neo4j_vector.similarity_search_with_score(question, k = 5)

for doc, score in docs_with_score:
    print("-" * 100)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 100)

----------------------------------------------------------------------------------------------------
Score:  0.944155752658844
–∏—Å—Ç–µ—Ä—Å—Ç–≤–∞ —ç–∫–æ–Ω–æ–º–∏—á–µ—Å–∫–æ–≥–æ  —Ä–∞–∑–≤–∏—Ç–∏—è –†–æ—Å—Å–∏–π—Å–∫–æ–π –§–µ–¥–µ—Ä–∞—Ü–∏–∏ (–¥–ª—è –ø—Ä–æ–¥—É–∫—Ü–∏–∏, –∫–æ—Ç–æ—Ä–∞—è –º–æ–∂–µ—Ç –±—ã—Ç—å –æ—Ç–Ω–µ—Å–µ–Ω–∞ –∫ –ø–æ—Ç—Ä–µ–±–∏—Ç–µ–ª—å—Å–∫–∏–º —Ç–æ–≤–∞—Ä–∞–º, –≤ –∫–∞—á–µ—Å—Ç–≤–µ–ò–¶–üùëñùëñ‚àí1–æ—Ç—Ä –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è –∏–Ω–¥–µ–∫—Å –ø–æ—Ç—Ä–µ–±–∏—Ç–µ–ª—å—Å–∫–∏—Ö —Ü–µ–Ω);  –° - –∫–æ—ç—Ñ—Ñ–∏—Ü–∏–µ–Ω—Ç —Å–¥–µ—Ä–∂–∏–≤–∞–Ω–∏—è —Ç–µ–º–ø–æ–≤ —Ä–æ—Å—Ç–∞ —Ü–µ–Ω, –æ–ø—Ä–µ–¥–µ–ª—è–µ–º—ã–π –≤ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏–∏ —Å –ª–æ–∫–∞–ª—å–Ω—ã–º–∏ –Ω–æ—Ä–º–∞—Ç–∏–≤–Ω—ã–º–∏ –∞–∫—Ç–∞–º–∏ –ü–ê–û ¬´–ö–æ–º–ø–∞–Ω–∏—è 1¬ª (–µ—Å–ª–∏ –ø—Ä–∏–º–µ–Ω–∏–º–æ). 
4.5.12 –í —Ü–µ–ª—è—Ö –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è –Ω–∞—á–∞–ª—å–Ω–æ–π (–º–∞–∫—Å–∏–º–∞–ª—å–Ω–æ–π) —Ü–µ–Ω—ã –¥–æ–≥–æ–≤–æ—Ä–∞ (–ø—Ä–µ–¥–º–µ—Ç–∞  –∑–∞–∫—É–ø–∫–∏) –º–µ—Ç–æ–¥–æ–º —Å–æ–ø–æ—Å—Ç–∞–≤–∏–º—ã—Ö —Ä—ã–Ω–æ—á–Ω—ã—Ö —Ü–µ–Ω (–∞–Ω–∞–ª–∏–∑ —Ä—ã–Ω–∫–∞) —Ä–µ–∫–æ–º–µ–Ω–

##### Step Back Chain

In [396]:
print(f'Inquiry: {inquiry}\n')
print(f'Reference: {refer}\n')

print(f'Abstraction: {abstraction}\n')

chain.invoke(
            {"question": abstraction},
            return_only_outputs = True,
            )

Inquiry: –£–≤–∞–∂–∞–µ–º—ã–µ —É—á–∞—Å—Ç–Ω–∏–∫–∏ –∫–æ–º–∏—Å—Å–∏–∏, —á—å–∏ —É–º–Ω—ã–µ –∏ —Ç–≤–æ—Ä—á–µ—Å–∫–∏–µ —Ä–∞–∑–º—ã—à–ª–µ–Ω–∏—è –ø–æ–∑–≤–æ–ª—è—é—Ç –≤–∑–≥–ª—è–Ω—É—Ç—å –Ω–∞ –∫–∞–∂–¥—ã–π –∞—Å–ø–µ–∫—Ç –∑–∞–∫—É–ø–æ—á–Ω–æ–≥–æ –ø—Ä–æ—Ü–µ—Å—Å–∞ –≤ –Ω–æ–≤–æ–º —Å–≤–µ—Ç–µ,  –ù–∞–ø—Ä–∞–≤–ª—è—é –≤–∞–º –¥–∞–Ω–Ω—ã–π –¥–æ–∫—É–º–µ–Ω—Ç, –≤ –∫–æ—Ç–æ—Ä–æ–º –ø—Ä–µ–¥–æ—Å—Ç–∞–≤–ª–µ–Ω–æ –ø–æ–¥—Ä–æ–±–Ω–µ–π—à–µ–µ –∏–∑–ª–æ–∂–µ–Ω–∏–µ —Ç–µ–∫—É—â–µ–≥–æ —Å—Ç–∞—Ç—É—Å–∞ –∫–æ–Ω–∫—É—Ä—Å–Ω–æ–π –ø—Ä–æ—Ü–µ–¥—É—Ä—ã, —á—Ç–æ —è–≤–ª—è–µ—Ç—Å—è –æ—Ç—Ä–∞–∂–µ–Ω–∏–µ–º –Ω–∞—à–µ–π –ø—Ä–µ–¥–∞–Ω–Ω–æ—Å—Ç–∏ –ø–æ–ª–Ω–æ–º—É –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–æ–Ω–Ω–æ–º—É –¥–æ–≤–µ—Ä–∏—é –∏ –≤–∞–∂–Ω–æ—Å—Ç–∏ –≤–∞—à–µ–≥–æ —É—á–∞—Å—Ç–∏—è –≤ –¥–∞–Ω–Ω–æ–º –≤–æ–ø—Ä–æ—Å–µ. –í–∞–∂–Ω–æ—Å—Ç—å –∫–æ—Ç–æ—Ä–æ–≥–æ –Ω–µ –ø–æ–¥–¥–∞–µ—Ç—Å—è –Ω–µ–¥–æ–æ—Ü–µ–Ω–∫–µ, —Ç–∞–∫ –∫–∞–∫ –æ–Ω –∞–∫—Ü–µ–Ω—Ç–∏—Ä—É–µ—Ç –≤–Ω–∏–º–∞–Ω–∏–µ –Ω–∞ —Ç—â–∞—Ç–µ–ª—å–Ω–æ–º —Ä–∞—Å—Å–º–æ—Ç—Ä–µ–Ω–∏–∏ —Ç–æ–≥–æ —É–Ω–∏–∫–∞–ª—å–Ω–æ–≥–æ –º–æ–º–µ–Ω—Ç–∞, –∫–æ–≥–¥–∞ –ª–∏—à—å –æ–¥–∏–Ω –ø–æ—Ç–µ–Ω—Ü–∏–∞–ª—å–Ω—ã–π —

Giga generation stopped with reason: blacklist


{'answer': '–ß—Ç–æ-—Ç–æ –≤ –≤–∞—à–µ–º –≤–æ–ø—Ä–æ—Å–µ –º–µ–Ω—è —Å–º—É—â–∞–µ—Ç. –ú–æ–∂–µ—Ç, –ø–æ–≥–æ–≤–æ—Ä–∏–º –Ω–∞ –¥—Ä—É–≥—É—é —Ç–µ–º—É?',
 'sources': ''}

In [397]:
docs_with_score = neo4j_vector.similarity_search_with_score(abstraction, k = 5)

for doc, score in docs_with_score:
    print("-" * 100)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 100)

----------------------------------------------------------------------------------------------------
Score:  0.9488827586174011
–æ—Å—Ç—å —É—á–∞—Å—Ç–Ω–∏–∫–∞ –∑–∞–∫—É–ø–∫–∏ —É–¥–æ–≤–ª–µ—Ç–≤–æ—Ä–∏—Ç—å –ø–æ—Ç—Ä–µ–±–Ω–æ—Å—Ç–∏ –ó–∞–∫–∞–∑—á–∏–∫–∞ —Å —É—á–µ—Ç–æ–º –æ–ø—ã—Ç–∞, –∫–≤–∞–ª–∏—Ñ–∏–∫–∞—Ü–∏–∏ –∏ –ø—Ä–æ—á–µ–µ, –∞ —Ç–∞–∫–∂–µ –≤ —Å–ª—É—á–∞—è—Ö, –∫–æ–≥–¥–∞ –∏–º–µ—é—Ç—Å—è –ø—Ä–µ–¥—É—Å–º–æ—Ç—Ä–µ–Ω–Ω—ã–µ –Ω–∞—Å—Ç–æ—è—â–∏–º –ü–æ–ª–æ–∂–µ–Ω–∏–µ–º –æ—Å–Ω–æ–≤–∞–Ω–∏—è –¥–ª—è –≤—ã–±–æ—Ä–∞ –∏–Ω—ã—Ö —Å–ø–æ—Å–æ–±–æ–≤ –∑–∞–∫—É–ø–∫–∏, –ø—Ä–∏ —Å–æ–±–ª—é–¥–µ–Ω–∏–∏ –æ–¥–Ω–æ–≥–æ –∏–∑ —Å–ª–µ–¥—É—é—â–∏—Ö —É—Å–ª–æ–≤–∏–π: 
6.7.5.1 –°–∂–∞—Ç—ã–µ —Å—Ä–æ–∫–∏ –¥–ª—è –ø—Ä–æ–≤–µ–¥–µ–Ω–∏—è –∑–∞–∫—É–ø–∫–∏ –Ω–µ –ø–æ–∑–≤–æ–ª—è—é—Ç –ø—Ä–æ–≤–µ—Å—Ç–∏ –∫–æ–Ω–∫—É—Ä—Å, –∞—É–∫—Ü–∏–æ–Ω. 
6.7.5.2 –°–ª–æ–∂–Ω–æ—Å—Ç—å —Ç–æ–≤–∞—Ä–æ–≤ (—Ä–∞–±–æ—Ç, —É—Å–ª—É–≥), —è–≤–ª—è—é—â–∏—Ö—Å—è –ø—Ä–µ–¥–º–µ—Ç–æ–º –∑–∞–∫—É–ø–∫–∏, –Ω–µ –ø–æ–∑–≤–æ–ª—è–µ—Ç –ø—Ä–æ–≤–µ—Å—Ç–∏ –∞—É–∫—Ü–∏–æ–Ω, –∑–∞–ø—Ä–æ—Å –∫–æ—Ç–∏—Ä–æ–≤–æ–∫. 
6.7.5.3 –ö–æ–Ω–∫—É—Ä—Å, –∞—É–∫—Ü–

In [398]:
docs_with_score

[(Document(page_content='–æ—Å—Ç—å —É—á–∞—Å—Ç–Ω–∏–∫–∞ –∑–∞–∫—É–ø–∫–∏ —É–¥–æ–≤–ª–µ—Ç–≤–æ—Ä–∏—Ç—å –ø–æ—Ç—Ä–µ–±–Ω–æ—Å—Ç–∏ –ó–∞–∫–∞–∑—á–∏–∫–∞ —Å —É—á–µ—Ç–æ–º –æ–ø—ã—Ç–∞, –∫–≤–∞–ª–∏—Ñ–∏–∫–∞—Ü–∏–∏ –∏ –ø—Ä–æ—á–µ–µ, –∞ —Ç–∞–∫–∂–µ –≤ —Å–ª—É—á–∞—è—Ö, –∫–æ–≥–¥–∞ –∏–º–µ—é—Ç—Å—è –ø—Ä–µ–¥—É—Å–º–æ—Ç—Ä–µ–Ω–Ω—ã–µ –Ω–∞—Å—Ç–æ—è—â–∏–º –ü–æ–ª–æ–∂–µ–Ω–∏–µ–º –æ—Å–Ω–æ–≤–∞–Ω–∏—è –¥–ª—è –≤—ã–±–æ—Ä–∞ –∏–Ω—ã—Ö —Å–ø–æ—Å–æ–±–æ–≤ –∑–∞–∫—É–ø–∫–∏, –ø—Ä–∏ —Å–æ–±–ª—é–¥–µ–Ω–∏–∏ –æ–¥–Ω–æ–≥–æ –∏–∑ —Å–ª–µ–¥—É—é—â–∏—Ö —É—Å–ª–æ–≤–∏–π: \n6.7.5.1 –°–∂–∞—Ç—ã–µ —Å—Ä–æ–∫–∏ –¥–ª—è –ø—Ä–æ–≤–µ–¥–µ–Ω–∏—è –∑–∞–∫—É–ø–∫–∏ –Ω–µ –ø–æ–∑–≤–æ–ª—è—é—Ç –ø—Ä–æ–≤–µ—Å—Ç–∏ –∫–æ–Ω–∫—É—Ä—Å, –∞—É–∫—Ü–∏–æ–Ω. \n6.7.5.2 –°–ª–æ–∂–Ω–æ—Å—Ç—å —Ç–æ–≤–∞—Ä–æ–≤ (—Ä–∞–±–æ—Ç, —É—Å–ª—É–≥), —è–≤–ª—è—é—â–∏—Ö—Å—è –ø—Ä–µ–¥–º–µ—Ç–æ–º –∑–∞–∫—É–ø–∫–∏, –Ω–µ –ø–æ–∑–≤–æ–ª—è–µ—Ç –ø—Ä–æ–≤–µ—Å—Ç–∏ –∞—É–∫—Ü–∏–æ–Ω, –∑–∞–ø—Ä–æ—Å –∫–æ—Ç–∏—Ä–æ–≤–æ–∫. \n6.7.5.3 –ö–æ–Ω–∫—É—Ä—Å, –∞—É–∫—Ü–∏–æ–Ω, –∑–∞–ø—Ä–æ—Å –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏–π, –∑–∞–ø—Ä–æ—Å –∫–æ—Ç–∏—Ä–æ–≤–æ–∫ –ø—Ä–∏–∑–Ω–∞–Ω—ã –Ω–µ—Å–

## Basic Queries | Neo4j

[Cypher](https://neo4j.com/docs/cypher-cheat-sheet/5/auradb-enterprise)

In [399]:
graph = Neo4jGraph(url = NEO4J_URI, username = NEO4J_USERNAME, password = NEO4J_PASSWORD, database = NEO4J_DATABASE)

In [400]:
print(graph.schema)

Node properties are the following:
Chunk {embedding: LIST, id: STRING, text: STRING, source: STRING, title: STRING}
Relationship properties are the following:

The relationships are the following:



In [401]:
cypher = """
  SHOW VECTOR INDEXES
  """
graph.query(cypher)

[{'id': 2,
  'name': 'vector',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Chunk'],
  'properties': ['embedding'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 4, 15, 9, 57, 52, 42000000, tzinfo=<UTC>),
  'readCount': 204}]

In [402]:
cypher = """
  MATCH (n)
  RETURN count(n)
  """
graph.query(cypher)

[{'count(n)': 1087}]

In [403]:
cypher = """
    MATCH (n:Chunk {title: "1.2"})
    RETURN n.text AS text
    """
graph.query(cypher)

[{'text': '1.2. –¢–µ—Ä–º–∏–Ω—ã –∏ –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏—è'}]

In [404]:
highest_scored_content = str(neo4j_vector.similarity_search(inquiry, k=3))

match = re.search(r'\d+(\.\d+)+', highest_scored_content)

if match:
    highest_scored_document_number = match.group(0)
    print("Highest scored document number:", highest_scored_document_number)

else:
    print("No matching number found")

if match:
    cypher_query = f"""
                    MATCH (n)
                    WHERE n.title STARTS WITH '{highest_scored_document_number}'
                    RETURN n.text
                    """

Highest scored document number: 6.7.5.1


In [405]:
graph.query(cypher_query)

[]