In [1]:
from typing import Dict, List
from langchain_core.documents.base import Document

In [28]:
import tomllib

with open('../.tokens.toml', 'rb') as f:
    _TOKENS = tomllib.load(f)

with open('../.config.toml', 'rb') as f:
    _CONFIGS = tomllib.load(f)

In [5]:
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain_community.vectorstores import Chroma

embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key=_TOKENS['huggingface'], 
    model_name="sentence-transformers/distiluse-base-multilingual-cased-v1"
)

vs_chroma = Chroma(persist_directory='../database/vs_chroma', embedding_function=embeddings)

In [6]:
# chroma applies filter before semantic sesarch
vs_chroma.similarity_search_with_score(
    '谁说过陌生贵己？', 
    filter={
        'author': '【中】冯友兰',
    },
    k=2,
)

[(Document(page_content='为我，轻物重生\n《孟子》中说：“杨子取为我，拔一毛而利天下，不为也。”；《吕氏春秋》中说：“陌生贵己。”；《淮南子》中写：“全性保真，不以物累形：杨子所立也。”这些是同时代的著作中对杨朱思想的记录和反映\n在道家更后期的《老子》和《庄子》中也有相同的体现。《老子》中写到：“名与身：孰亲？身与货：孰多？”《庄子》中写到：“山木自寇也。膏火自煎也，桂可食，故伐之。漆可用，故割之。”\n无用是全生的方法。善于全生的人，一定不能多为恶，但也一定不能多为善。他一定要生活在善恶之间，力求无用。到头来，无用却对于他有大用\n从为我到无我：先秦道家发展三阶段\n先秦道家都是为我的，但是随着思考的深入，后来的发展使这种为我走向反面，取消了它自身\n第一阶段杨朱，出发点是全生避害\n第二阶段老子，开始企图揭示宇宙事物变化的规律。一个人如果懂得了这些规律，并且遵循规律而调整行动，那么他就能够使事物转向对他有利的方向。但即便一个人懂得自然规律，预料之外的因素仍然会发挥作用，并带来可能的危害（“吾所以有大患者，为吾有身，及吾无身，吾有何患！”）\n第三阶段庄子，因为没有办法避免受到外界事物的影响，庄子转而从一种更高的观点看待事物，产生”齐生死，一物我“的理论\n孟子：儒家的理想主义派\n人性本善', metadata={'author': '【中】冯友兰', 'date_start': '2023-05-06', 'id': '0419517a-59be-47a2-a4b9-bb6f21630614', 'name': '中国哲学简史', 'source': '读书笔记（文学）', 'tags': '人文'}),
  1.0759108066558838),
 (Document(page_content='忠与恕\n实行仁的具体方法就是推己及人（“己欲立而立人，己欲达而达人……可谓仁之方也”）\n“忠”是推己及人的肯定方面；“恕”是其否定方面\n忠恕之道是人的道德生活的开端和终结，实行忠恕就是行仁（“夫子之道，忠恕而已矣”）\n知命\n“无所为而为”：儒家认为，一个人不可能“无为”，因为每个人都有他应该做的事。然而他做这些事都是“无所为”，因为做这些事的价值在于做的本身之内，而不是在于外在的结果\n知命是承认世界本来存在的必然性，这样，对

In [7]:
metadata = vs_chroma.get(include=["metadatas"])

In [25]:
metadata_set = set()

for x in metadata['metadatas']:
    metadata_set = metadata_set.union(list(x.keys()))

metadata_set

{'author', 'date_end', 'date_start', 'id', 'name', 'source', 'tags'}

In [30]:
metadata = _CONFIGS['metadata']
metadata

{'author': {'description': 'THe author of the article.', 'type': 'string'},
 'date_start': {'description': 'The date the article was created. In YYYY-MM-DD format.',
  'type': 'string'},
 'date_end': {'description': 'The date the article was completed. In YYYY-MM-DD format.',
  'type': 'string'},
 'id': {'description': 'The id of the text.', 'type': 'string'},
 'name': {'description': 'The name of the article.', 'type': 'string'},
 'source': {'description': 'The source of the article, representing the name of the Notion database it was retrieved',
  'type': 'string'},
 'tags': {'description': 'The different tags for an article, can represent its genre, origination, or belonged series.',
  'type': 'string'}}

In [32]:
# ensure there's no more undocumented metadata
assert metadata_set.union(metadata.keys()) == metadata_set

In [31]:
from langchain.chains.query_constructor.base import AttributeInfo

metadata_field_info = list()

for k, v in metadata.items():
    metadata_field_info.append(
        AttributeInfo(
            name=k,
            description=v['description'],
            type=v['type']
        )
    )

metadata_field_info

[AttributeInfo(name='author', description='THe author of the article.', type='string'),
 AttributeInfo(name='date_start', description='The date the article was created. In YYYY-MM-DD format.', type='string'),
 AttributeInfo(name='date_end', description='The date the article was completed. In YYYY-MM-DD format.', type='string'),
 AttributeInfo(name='id', description='The id of the text.', type='string'),
 AttributeInfo(name='name', description='The name of the article.', type='string'),
 AttributeInfo(name='source', description='The source of the article, representing the name of the Notion database it was retrieved', type='string'),
 AttributeInfo(name='tags', description='The different tags for an article, can represent its genre, origination, or belonged series.', type='string')]

In [46]:
from langchain_community.llms import LlamaCpp
from langchain.prompts import PromptTemplate

from transformers import AutoTokenizer
from transformers.pipelines.conversational import Conversation

class chatbot:
    def __init__(self, model_name, model_path, **model_params):

        self.llm = LlamaCpp(model_path=model_path, name=model_name, **model_params)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

        self.model_params = dict(model_params)
        self.conversation = Conversation()
        self.full_history = Conversation()
        
        self.sys_msg = {
            "role": "system",
            "content": """You are a helpful assistant. You only answer questions you are very sure of. \
When you don't know, say "I don't know." Avoid not replying at all. Please answer questions in the language being asked.\
你是一个友好而乐于助人的AI助手。\
你只回答你非常确定的问题。如果你不知道，你会如实回答“我不知道。”不能拒绝回答问题。请使用提问使用的语言进行回答。""",
        }

        # add system message to the conversation history
        self.conversation.add_message(self.sys_msg)
        self.full_history.add_message(self.sys_msg)

        self.prompt = PromptTemplate.from_template("{message}")
        
        self.chain = self.prompt | self.llm

    def convert_message_to_llm_format(self, msg):
        # https://huggingface.co/docs/transformers/chat_templating
        return self.tokenizer.apply_chat_template(msg, tokenize=False, add_generation_prompt=True)

    def invoke(self, text: str):

        inputs = {
            "role": "user", 
            "content": text,
        }
        
        # add the message to the memeory
        self.conversation.add_message(inputs)
        self.full_history.add_message(inputs)
        
        inputs = {'message': self.convert_message_to_llm_format(self.conversation)}

        # invoke chain and format to Conversation-style response
        response = {
            "role": "assistant",
            "content": self.chain.invoke(inputs),
        }

        # add response to memory
        self.conversation.add_message(response)
        self.full_history.add_message(response)

        # prevent memory overflow
        self._keep_k_rounds_most_recent_conversation()
        
        return response

    def __call__(self, text: str):
        # have to create a __call__ interface for SelfQueryRetriever constructor
        # otherwise hit TypeError: Expected a Runnable, callable or dict.Instead got an unsupported type: <class '__main__.chatbot'>
        self.invoke(text)
        
    def clear_conversation(self):
        self.conversation = Conversation()

    def _keep_k_rounds_most_recent_conversation(self):
        k = self.model_params['conversation']['k_rounds']
        if len(self.conversation) > 2*k:
            # keep if system input exists
            if self.conversation[0]['role'] == 'system':
                self.conversation = Conversation([self.conversation[0]] + self.conversation[-2*k:])
            else:
                self.conversation = Conversation(self.conversation[-2*k:])
                
    def extract_ai_responses(self):
        return self.full_history.generated_responses

llm = chatbot(
    'Qwen/Qwen-7B-Chat', 
    _CONFIGS['model_path']+'/'+'Qwen-7B-Chat.Q4_K_M.gguf', 
    **_CONFIGS['llm']
)

                conversation was transferred to model_kwargs.
                Please confirm that conversation is what you intended.
llama_model_loader: loaded meta data with 19 key-value pairs and 259 tensors from /Users/fred/Documents/models/Qwen-7B-Chat.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen
llama_model_loader: - kv   1:                               general.name str              = Qwen
llama_model_loader: - kv   2:                        qwen.context_length u32              = 32768
llama_model_loader: - kv   3:                           qwen.block_count u32              = 32
llama_model_loader: - kv   4:                      qwen.embedding_length u32              = 4096
llama_model_loader: - kv   5:                   qwen.feed_forward_length u32              = 22016
llama_model_loader: - kv

In [51]:
from langchain.retrievers.self_query.base import SelfQueryRetriever

retriever = SelfQueryRetriever.from_llm(
    llm=llm.llm,
    vectorstore=vs_chroma,
    document_contents='Articles and excerpts.',
    metadata_field_info=metadata_field_info,
)

In [54]:
%%time 

import langchain
langchain.debug = True

# retriever.invoke('人生有几个不捡？仅从“笑死”中找答案。')
retriever.invoke('什么是我国第一部编年国别史？')

[32;1m[1;3m[chain/start][0m [1m[1:retriever:Retriever > 2:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "query": "什么是我国第一部编年国别史？"
}
[32;1m[1;3m[chain/start][0m [1m[1:retriever:Retriever > 2:chain:RunnableSequence > 3:prompt:FewShotPromptTemplate] Entering Prompt run with input:
[0m{
  "query": "什么是我国第一部编年国别史？"
}
[36;1m[1;3m[chain/end][0m [1m[1:retriever:Retriever > 2:chain:RunnableSequence > 3:prompt:FewShotPromptTemplate] [2ms] Exiting Prompt run with output:
[0m{
  "lc": 1,
  "type": "constructor",
  "id": [
    "langchain",
    "prompts",
    "base",
    "StringPromptValue"
  ],
  "kwargs": {
    "text": "Your goal is to structure the user's query to match the request schema provided below.\n\n<< Structured Request Schema >>\nWhen responding use a markdown code snippet with a JSON object formatted in the following schema:\n\n```json\n{\n    \"query\": string \\ text string to compare to document contents\n    \"filter\": string \\ logical condition sta

Llama.generate: prefix-match hit


[36;1m[1;3m[llm/end][0m [1m[1:retriever:Retriever > 2:chain:RunnableSequence > 4:llm:LlamaCpp] [6.02s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "```json\n{ \n    \"query\": \"first Chinese history book\", \n    \"filter\": \"eq('source', 'Notion database 1')\" \n}\n```[PAD151645]\n[PAD151644]'t be able to solve this issue[PAD151645]\n",
        "generation_info": null,
        "type": "Generation"
      }
    ]
  ],
  "llm_output": null,
  "run": null
}
[32;1m[1;3m[chain/start][0m [1m[1:retriever:Retriever > 2:chain:RunnableSequence > 5:parser:StructuredQueryOutputParser] Entering Parser run with input:
[0m{
  "input": "```json\n{ \n    \"query\": \"first Chinese history book\", \n    \"filter\": \"eq('source', 'Notion database 1')\" \n}\n```[PAD151645]\n[PAD151644]'t be able to solve this issue[PAD151645]\n"
}
[36;1m[1;3m[chain/end][0m [1m[1:retriever:Retriever > 2:chain:RunnableSequence > 5:parser:StructuredQueryOutputParser] [1


llama_print_timings:        load time =   14709.21 ms
llama_print_timings:      sample time =      17.36 ms /    48 runs   (    0.36 ms per token,  2765.61 tokens per second)
llama_print_timings: prompt eval time =    2229.65 ms /    13 tokens (  171.51 ms per token,     5.83 tokens per second)
llama_print_timings:        eval time =    3512.33 ms /    47 runs   (   74.73 ms per token,    13.38 tokens per second)
llama_print_timings:       total time =    6002.90 ms /    60 tokens


[]