In [1]:
from langchain.embeddings import OpenAIEmbeddings
from pino_inferior.core import OPENAI_API_KEY
from pino_inferior.models import aengine
from pino_inferior.memory import Memory, SequentialSplitter, MarkdownHeaderTextSplitter
from pino_inferior.core import VECTOR_DB, VECTOR_DB_PARAMS, MEMORY_PARAMS

In [2]:
sentence_encoder = OpenAIEmbeddings(
    openai_api_key=OPENAI_API_KEY,
    model="text-embedding-ada-002",
)
memory = Memory(
    engine=aengine,
    vector_db=VECTOR_DB(
        embedding_function=sentence_encoder,
        **VECTOR_DB_PARAMS
    ),
    **MEMORY_PARAMS
)

In [3]:
test_response = memory.retrieve("–ü–æ—Ç–µ—Ä–∏ —Ä–æ—Å—Å–∏–π—Å–∫–æ–π –∞—Ä–º–∏–∏")
for i, (doc, _) in enumerate(test_response):
    print(f"{i} ) {doc.meta} {doc.text}")

0 ) {'Header1': '–ú–∏–Ω—Ç—Ä—É–¥–∞ —Ä–∞—Å–∫—Ä—ã–ª–æ –º–∞—Å—à—Ç–∞–±—ã –≤–æ–∑–º–æ–∂–Ω—ã—Ö –ø–æ—Ç–µ—Ä—å —Å—Ä–µ–¥–∏ —Ä–æ—Å—Å–∏–π—Å–∫–∏—Ö –≤–æ–µ–Ω–Ω—ã—Ö –≤ –£–∫—Ä–∞–∏–Ω–µ', 'ParagraphHash': '59751ec69b076182632f7bed06d087df'} –í –ø—Ä–æ—à–ª–æ–º –≥–æ–¥—É –º–∞—Å—à—Ç–∞–±—ã –ø–µ—á–∞—Ç–∏ —É–¥–æ—Å—Ç–æ–≤–µ—Ä–µ–Ω–∏–π –±—ã–ª–∏ –∑–Ω–∞—á–∏—Ç–µ–ª—å–Ω–æ –º–µ–Ω—å—à–µ. –õ–µ—Ç–æ–º 2022 –≥–æ–¥–∞ –ú–∏–Ω—Ç—Ä—É–¥ –∑–∞–∫–∞–∑—ã–≤–∞–ª –ø–µ—á–∞—Ç—å 82 840 –±–ª–∞–Ω–∫–æ–≤ —É–¥–æ—Å—Ç–æ–≤–µ—Ä–µ–Ω–∏–π –≤–µ—Ç–µ—Ä–∞–Ω–æ–≤ –±–æ–µ–≤—ã—Ö –¥–µ–π—Å—Ç–≤–∏–π –∏ 5 777 —É–¥–æ—Å—Ç–æ–≤–µ—Ä–µ–Ω–∏–π –¥–ª—è —á–ª–µ–Ω–æ–≤ —Å–µ–º—å–∏ –ø–æ–≥–∏–±—à–µ–≥–æ, —Å—É–º–º–∞ –∫–æ–Ω—Ç—Ä–∞–∫—Ç–∞ –±—ã–ª–∞ 9,8 –º–ª–Ω —Ä—É–±–ª–µ–π.  
1 ) {'Header1': '–ú–∏–Ω—Ç—Ä—É–¥–∞ —Ä–∞—Å–∫—Ä—ã–ª–æ –º–∞—Å—à—Ç–∞–±—ã –≤–æ–∑–º–æ–∂–Ω—ã—Ö –ø–æ—Ç–µ—Ä—å —Å—Ä–µ–¥–∏ —Ä–æ—Å—Å–∏–π—Å–∫–∏—Ö –≤–æ–µ–Ω–Ω—ã—Ö –≤ –£–∫—Ä–∞–∏–Ω–µ', 'ParagraphHash': '03518b821551a03a0e32056277ca84e6'} –í –¥–æ–∫—É–º–µ–Ω—Ç–∞—Ü–∏–∏ —É–∫–∞–∑–∞–Ω–æ, —á—Ç–æ 600 —Ç—ã—Å—è—á –æ—Ç–ø–µ—á–∞—Ç–∞–Ω–Ω—ã—Ö —É–¥–æ—Å—Ç–æ–≤–µ

In [4]:
test_response = memory.retrieve("–ú–∏–Ω—Ç—Ä—É–¥ –∑–∞–∫–∞–∑–∞–ª —É–¥–æ—Å—Ç–æ–≤–µ—Ä–µ–Ω–∏–π")
for i, (doc, _) in enumerate(test_response):
    print(f"{i} ) {doc.text}")

0 ) ¬´–° –º–µ–Ω—è —Å–ø—É—Å—Ç–∏–ª–∏ —à—Ç–∞–Ω—ã, –æ—Å—Ç–∞–≤–ª—è—è –≤ –ª–µ–∂–∞—á–µ–º –ø–æ–ª–æ–∂–µ–Ω–∏–∏, –∏, –æ—Å—Ç–∞–≤–∏–≤ –≤ —Ç—Ä—É—Å–∞—Ö, —Å—Ç–∞–ª–∏ –ø–∏—Ö–∞—Ç—å –Ω–µ–∏–∑–≤–µ—Å—Ç–Ω—ã–π –ø—Ä–µ–¥–º–µ—Ç –≤ –∑–∞–¥–Ω—é—é —á–∞—Å—Ç—å –ø–æ–ø—ã. –û—Å–æ–∑–Ω–∞–≤, —á—Ç–æ —ç—Ç–æ —É–∂–µ –∫—Ä–∞–π–Ω–µ–µ, —á—Ç–æ —è –º–æ–≥—É –≤—ã–¥–µ—Ä–∂–∞—Ç—å, —è –Ω–∞—á–∞–ª –æ—Ç–≤–µ—á–∞—Ç—å –Ω–∞ –∏—Ö –≤–æ–ø—Ä–æ—Å—ã –∏ –ø–æ—Å–ª–µ –ø–æ–ª—É—á–µ–Ω–∏—è –≤—Å–µ—Ö –Ω—É–∂–Ω—ã—Ö –∏–º –æ—Ç–≤–µ—Ç–æ–≤, –º–µ–Ω—è —É—Å–∞–¥–∏–ª–∏ —Å–æ —Å–≤—è–∑–∞–Ω–Ω—ã–º–∏ –Ω–æ–≥–∞–º–∏ –∏ –∑–∞—Å—Ç–µ–≥–Ω—É—Ç—ã–º–∏ –≤ –Ω–∞—Ä—É—á–Ω–∏–∫–∞—Ö —Ä—É–∫–∞–º–∏ –∏ –Ω–∞—á–∞–ª–∏ –ø–æ–¥—Å–æ–≤—ã–≤–∞—Ç—å –¥–æ–∫—É–º–µ–Ω—Ç—ã –Ω–∞ —Å—Ç–æ–ª, –Ω–∞ –∫–æ—Ç–æ—Ä–æ–º –ª–µ–∂–∞–ª —Ç–µ–ª–µ—Ñ–æ–Ω –∞—Ä–º–µ–π—Å–∫–∏–π (—Å–ø–æ—Å–æ–±–Ω—ã–π –≤—ã—Ä–∞–±–∞—Ç—ã–≤–∞—Ç—å —Ç–æ–∫) –¥–ª—è –∏—Ö –ø–æ–¥–ø–∏—Å—ã–≤–∞–Ω–∏—è, —Å —É–≥—Ä–æ–∑–æ–π –ø—Ä–æ–¥–æ–ª–∂–µ–Ω–∏—è –ø—ã—Ç–æ–∫ –≤ —Å–ª—É—á–∞–µ –º–æ–µ–≥–æ –æ—Ç–∫–∞–∑–∞¬ª, ‚Äî —Ä–∞—Å—Å–∫–∞–∑—ã–≤–∞–ª –ö—É–¥—Ä—è—à–æ–≤.  
1 ) –í –∞–≤–≥—É—Å—Ç–µ –ö—É–¥—Ä—è—à–æ–≤–∞

In [5]:
test_response = memory.retrieve("–ê—Ä–º–∏–∏. –ú–∏–Ω—Ç—Ä—É–¥ –∑–∞–∫–∞–∑–∞–ª —É–¥–æ—Å—Ç–æ–≤–µ—Ä–µ–Ω–∏–π")
for i, (doc, _) in enumerate(test_response):
    print(f"{i} ) {doc.text}")

0 ) –ò–∑–¥–∞–Ω–∏–µ –æ—Ç–º–µ—á–∞–µ—Ç, —á—Ç–æ –ø—Ä–æ—Ç–æ–∫–æ–ª –Ω–∞ –î–æ—Ä–æ–≤—Å–∫–∏—Ö —Å–æ—Å—Ç–∞–≤–∏–ª–∏ ¬´–±—É–∫–≤–∞–ª—å–Ω–æ —á–µ—Ä–µ–∑ –¥–µ–Ω—å¬ª –ø–æ—Å–ª–µ —Ç–æ—Ä–∂–µ—Å—Ç–≤–µ–Ω–Ω–æ–≥–æ –≤—Ä—É—á–µ–Ω–∏—è –µ–π –Ω–µ–≥–æ—Å—É–¥–∞—Ä—Å—Ç–≤–µ–Ω–Ω–æ–π –º–µ–¥–∞–ª–∏ ¬´–ó–∞ –≤–æ–µ–Ω–Ω–æ-–ø–∞—Ç—Ä–∏–æ—Ç–∏—á–µ—Å–∫—É—é —Ä–∞–±–æ—Ç—É¬ª. –ö–∞–∫ –ø–∏—à–µ—Ç 1rnd, ¬´–ø–æ–ª–∏—Ü–µ–π—Å–∫–∏–µ –±—ã–ª–∏ –Ω–µ–º–Ω–æ–≥–æ –æ–±–µ—Å–∫—É—Ä–∞–∂–µ–Ω—ã¬ª —Ç–µ–º, —á—Ç–æ –º–µ–¥–∞–ª—å—é –≥–ª–∞–≤–Ω—É—é —Ä–µ–¥–∞–∫—Ç–æ—Ä–∫—É –Ω–∞–≥—Ä–∞–¥–∏–ª –∏—Ö –±—ã–≤—à–∏–π –Ω–∞—á–∞–ª—å–Ω–∏–∫ ‚Äî –≥–µ–Ω–µ—Ä–∞–ª –ú–í–î –≤ –æ—Ç—Å—Ç–∞–≤–∫–µ –í–∏—Ç–∞–ª–∏–π –®–µ–≤—á–µ–Ω–∫–æ.  
1 ) –ù–∞ –≥–ª–∞–≤–Ω—É—é —Ä–µ–¥–∞–∫—Ç–æ—Ä–∫—É —Ä–æ—Å—Ç–æ–≤—Å–∫–æ–≥–æ –∏–∑–¥–∞–Ω–∏—è 1rnd –ï–ª–µ–Ω—É –î–æ—Ä–æ–≤—Å–∫–∏—Ö —Å–æ—Å—Ç–∞–≤–∏–ª–∏ –ø—Ä–æ—Ç–æ–∫–æ–ª –æ ¬´–¥–∏—Å–∫—Ä–µ–¥–∏—Ç–∞—Ü–∏–∏¬ª –∞—Ä–º–∏–∏ (—Å—Ç–∞—Ç—å—è 20.3.3 –ö–æ–ê–ü) –∏–∑-–∑–∞ –æ—Å–µ–Ω–Ω–µ–π –Ω–æ–≤–æ—Å—Ç–∏ –æ –ª–∏—Å—Ç–æ–≤–∫–∞—Ö –ø—Ä–æ—Ç–∏–≤ –º–æ–±–∏–ª–∏–∑–∞—Ü–∏–∏, —Ä–∞—Å–∫–ª–µ–µ–Ω–Ω—ã—Ö –≤–æ–∑–ª–µ –∑–¥–∞–Ω–∏—è –

In [6]:
memory.top_k_paragraphs = 50
test_response = memory.retrieve("–ú–∏–Ω—Ç—Ä—É–¥ –∑–∞–∫–∞–∑–∞–ª —É–¥–æ—Å—Ç–æ–≤–µ—Ä–µ–Ω–∏–π")
for i, (doc, _) in enumerate(test_response):
    print(f"{i} ) {doc.text}")

0 ) ¬´–° –º–µ–Ω—è —Å–ø—É—Å—Ç–∏–ª–∏ —à—Ç–∞–Ω—ã, –æ—Å—Ç–∞–≤–ª—è—è –≤ –ª–µ–∂–∞—á–µ–º –ø–æ–ª–æ–∂–µ–Ω–∏–∏, –∏, –æ—Å—Ç–∞–≤–∏–≤ –≤ —Ç—Ä—É—Å–∞—Ö, —Å—Ç–∞–ª–∏ –ø–∏—Ö–∞—Ç—å –Ω–µ–∏–∑–≤–µ—Å—Ç–Ω—ã–π –ø—Ä–µ–¥–º–µ—Ç –≤ –∑–∞–¥–Ω—é—é —á–∞—Å—Ç—å –ø–æ–ø—ã. –û—Å–æ–∑–Ω–∞–≤, —á—Ç–æ —ç—Ç–æ —É–∂–µ –∫—Ä–∞–π–Ω–µ–µ, —á—Ç–æ —è –º–æ–≥—É –≤—ã–¥–µ—Ä–∂–∞—Ç—å, —è –Ω–∞—á–∞–ª –æ—Ç–≤–µ—á–∞—Ç—å –Ω–∞ –∏—Ö –≤–æ–ø—Ä–æ—Å—ã –∏ –ø–æ—Å–ª–µ –ø–æ–ª—É—á–µ–Ω–∏—è –≤—Å–µ—Ö –Ω—É–∂–Ω—ã—Ö –∏–º –æ—Ç–≤–µ—Ç–æ–≤, –º–µ–Ω—è —É—Å–∞–¥–∏–ª–∏ —Å–æ —Å–≤—è–∑–∞–Ω–Ω—ã–º–∏ –Ω–æ–≥–∞–º–∏ –∏ –∑–∞—Å—Ç–µ–≥–Ω—É—Ç—ã–º–∏ –≤ –Ω–∞—Ä—É—á–Ω–∏–∫–∞—Ö —Ä—É–∫–∞–º–∏ –∏ –Ω–∞—á–∞–ª–∏ –ø–æ–¥—Å–æ–≤—ã–≤–∞—Ç—å –¥–æ–∫—É–º–µ–Ω—Ç—ã –Ω–∞ —Å—Ç–æ–ª, –Ω–∞ –∫–æ—Ç–æ—Ä–æ–º –ª–µ–∂–∞–ª —Ç–µ–ª–µ—Ñ–æ–Ω –∞—Ä–º–µ–π—Å–∫–∏–π (—Å–ø–æ—Å–æ–±–Ω—ã–π –≤—ã—Ä–∞–±–∞—Ç—ã–≤–∞—Ç—å —Ç–æ–∫) –¥–ª—è –∏—Ö –ø–æ–¥–ø–∏—Å—ã–≤–∞–Ω–∏—è, —Å —É–≥—Ä–æ–∑–æ–π –ø—Ä–æ–¥–æ–ª–∂–µ–Ω–∏—è –ø—ã—Ç–æ–∫ –≤ —Å–ª—É—á–∞–µ –º–æ–µ–≥–æ –æ—Ç–∫–∞–∑–∞¬ª, ‚Äî —Ä–∞—Å—Å–∫–∞–∑—ã–≤–∞–ª –ö—É–¥—Ä—è—à–æ–≤.  
1 ) –í –∞–≤–≥—É—Å—Ç–µ –ö—É–¥—Ä—è—à–æ–≤–∞

In [7]:
memory.top_k_paragraphs = 50
memory.top_k_sentences = 200
test_response = memory.retrieve("–ü–æ—Ç–µ—Ä–∏ —Ä–æ—Å—Å–∏–π—Å–∫–æ–π –∞—Ä–º–∏–∏")
for i, (doc, _) in enumerate(test_response):
    print(f"{i} ) {doc.meta} {doc.text}")

0 ) {'Header1': '–ú–∏–Ω—Ç—Ä—É–¥–∞ —Ä–∞—Å–∫—Ä—ã–ª–æ –º–∞—Å—à—Ç–∞–±—ã –≤–æ–∑–º–æ–∂–Ω—ã—Ö –ø–æ—Ç–µ—Ä—å —Å—Ä–µ–¥–∏ —Ä–æ—Å—Å–∏–π—Å–∫–∏—Ö –≤–æ–µ–Ω–Ω—ã—Ö –≤ –£–∫—Ä–∞–∏–Ω–µ', 'ParagraphHash': '9cabd3d9f8130325565bf4cc62954e66'} –ú–∏–Ω–∏—Å—Ç–µ—Ä—Å—Ç–≤–æ —Ç—Ä—É–¥–∞ –∏ —Å–æ—Ü–∑–∞—â–∏—Ç—ã –†–§ –∑–∞–∫–∞–∑–∞–ª–æ –ø–æ—á—Ç–∏ –º–∏–ª–ª–∏–æ–Ω —É–¥–æ—Å—Ç–æ–≤–µ—Ä–µ–Ω–∏–π –¥–ª—è –≤–µ—Ç–µ—Ä–∞–Ω–æ–≤ –±–æ–µ–≤—ã—Ö –¥–µ–π—Å—Ç–≤–∏–π –∏ —Ä–æ–¥—Å—Ç–≤–µ–Ω–Ω–∏–∫–æ–≤ –ø–æ–≥–∏–±—à–∏—Ö –≤–µ—Ç–µ—Ä–∞–Ω–æ–≤ –≤–æ–π–Ω –∏ –∏–Ω–≤–∞–ª–∏–¥–æ–≤. –¢–∞–∫–æ–π —Ç–µ–Ω–¥–µ—Ä ¬´–í—ë—Ä—Å—Ç–∫–∞¬ª –æ–±–Ω–∞—Ä—É–∂–∏–ª–∞ –Ω–∞ –ø–æ—Ä—Ç–∞–ª–µ –≥–æ—Å–∑–∞–∫—É–ø–æ–∫. –¶–∏—Ñ—Ä—ã –º–æ–≥—É—Ç –∫–æ—Å–≤–µ–Ω–Ω–æ —Å–≤–∏–¥–µ—Ç–µ–ª—å—Å—Ç–≤–æ–≤–∞—Ç—å –æ –ø—Ä–∏–º–µ—Ä–Ω—ã—Ö –º–∞—Å—à—Ç–∞–±–∞—Ö —É—á–∞—Å—Ç–≤—É—é—â–∏—Ö –≤ –±–æ—è—Ö –∏ –ø–æ—Ç–µ—Ä—è—Ö —Ä–æ—Å—Å–∏–π—Å–∫–æ–π –≥—Ä—É–ø–ø–∏—Ä–æ–≤–∫–∏.  
1 ) {'Header1': '–ú–∏–Ω—Ç—Ä—É–¥–∞ —Ä–∞—Å–∫—Ä—ã–ª–æ –º–∞—Å—à—Ç–∞–±—ã –≤–æ–∑–º–æ–∂–Ω—ã—Ö –ø–æ—Ç–µ—Ä—å —Å—Ä–µ–¥–∏ —Ä–æ—Å—Å–∏–π—Å–∫–∏—Ö –≤–æ–µ–Ω–Ω—ã—Ö –≤ –£–∫—Ä–∞–∏–Ω–µ', 'Para