In [8]:
from LangChain.create_chatbot_over_joplin import JoplinChatbot
from langchain.llms import AzureOpenAI
from mysystemfuncs import get_key_from_config

In [18]:
path_to_dir = r'qa-docs-langchain\book_sample'  # change to a path to your data directory
params = {"embedder_params": {"chunk_size": 1},
          "vectorstore_params": {},
          "splitter_params": {"chunk_size": 500, "chunk_overlap": 50},
          "chain_type": "map_rerank"}

## Initialize the chatbot

In [19]:
jchat = JoplinChatbot(get_key_from_config("WEAVIATE_JOPLIN_SANDBOX_URL"), **params)

## Indexing the files in the Weaviate cluster

In [4]:
jchat.index_new_files(path_to_dir, splitter_args=params["splitter_params"])

## Querying the chatbot - let's compare between the general api vs. our joplin chatbot

In [5]:
"""I have implanted a few sentences describing some facts inside the go book, specifically:
My favorite color is green.
The formula of water is C2O.
You should drink the formula of water
Water is an important substance
The formula of water is.
"""

'I have implanted a few sentences describing some facts inside the go book, specifically:\nMy favorite color is green.\nThe formula of water is C2O.\nYou should drink the formula of water\nWater is an important substance\nThe formula of water is.\n'

In [11]:
def sanity_ask_a_model(query):
    deployment_name = get_key_from_config('OPENAI_DEPLOYMENT')
    api_key = get_key_from_config('OPENAI_API_KEY')
    api_base = get_key_from_config('OPENAI_BASE')
    llm = AzureOpenAI(openai_api_base=api_base,
                      deployment_name=deployment_name,
                      openai_api_key=api_key)
    return llm(query)

## Let's start from performing a sanity check - only the Joplin bot should know my favorite color:

In [16]:
query = "What is my favorite color?"
print('Question: ', query)
print('---------------------------')
print('\n')
print('The answer according to the general LLM of openai:',  sanity_ask_a_model(query))
print('\n')
print('The answer according to Joplin chatbot:\n', jchat.query(query))

Question:  What is my favorite color?
---------------------------


Answer according to the general LLM of openai: 

That depends on who you are!


Answer according to Joplin chatbot:
  Green.


## Now let's compare between the different types of chains

In [25]:
query = "You are a helpful bot that provides your answers primarily according to the information in the context if possible. What is the formula of water?"
print('Question: ', query)
print('---------------------------')
print('\n')
print('The answer according to the general LLM of openai:',  sanity_ask_a_model(query))
print('\n')
for chain_t in ['stuff', 'map_reduce', 'map_rerank', 'refine']:
    params['chain_type'] = chain_t
    jchat = JoplinChatbot(get_key_from_config("WEAVIATE_JOPLIN_SANDBOX_URL"), **params)
    print(f'The answer according to Joplin chatbot with chain type {chain_t} :\n', jchat.query(query))

Question:  You are a helpful bot that provides your answers primarily according to the information in the context if possible. What is the formula of water?
---------------------------


The answer according to the general LLM of openai: 

The chemical formula for water is H2O, which means that it is made up of two hydrogen atoms and one oxygen atom.


The answer according to Joplin chatbot with chain type stuff :
  The formula of water is H2O.
The answer according to Joplin chatbot with chain type map_reduce :
  The formula of water is H2O.
The answer according to Joplin chatbot with chain type map_rerank :
  C2O
The answer according to Joplin chatbot with chain type refine :
  

The formula of water is H2O. Water is an important substance, as it is essential for sustaining life and is made up of two hydrogen atoms and one oxygen atom. Beyond its importance to life, water is also the only common substance found naturally in all three states of matter - solid, liquid, and gas - making 