In [1]:
from langchain.vectorstores import FAISS

from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
import os

In [2]:
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = "2023-07-01-preview"
os.environ["OPENAI_API_BASE"] = ""
os.environ["OPENAI_API_KEY"] = ""

In [3]:
embedding_model = OpenAIEmbeddings(chunk_size=10)

In [4]:
recipe_1 = TextLoader('test1.json').load()
text_splitter_1 = CharacterTextSplitter(chunk_overlap=100)
recipe_1_content = text_splitter_1.split_documents(recipe_1)

In [5]:
recipe_2 = TextLoader('test2.json').load()
text_splitter_2 = CharacterTextSplitter(chunk_overlap=100)
recipe_2_content = text_splitter_2.split_documents(recipe_2)

In [6]:
def embed_index(doc_list, embed_fn, index_store):
  """Function takes in existing vector_store, 
  new doc_list and embedding function that is 
  initialized on appropriate model. Local or online. 
  New embedding is merged with the existing index. If no 
  index given a new one is created"""
  #check whether the doc_list is documents, or text
  try:
    faiss_db = FAISS.from_documents(doc_list, 
                              embed_fn)  
  except Exception as e:
    faiss_db = FAISS.from_texts(doc_list, 
                              embed_fn)
  
  if os.path.exists(index_store):
    local_db = FAISS.load_local(index_store,embed_fn)
    #merging the new embedding with the existing index store
    local_db.merge_from(faiss_db)
    print("Merge completed")
    local_db.save_local(index_store)
    print("Updated index saved")
  else:
    faiss_db.save_local(folder_path=index_store)
    print("New store created...")


In [7]:
embed_index(doc_list=recipe_1_content,
            embed_fn=embedding_model,
            index_store='new_index')

New store created...


In [8]:
embed_index(doc_list=recipe_2_content,
            embed_fn=embedding_model,
            index_store='new_index')

Merge completed
Updated index saved


In [9]:
from langchain.chat_models import AzureChatOpenAI
vector_index = FAISS.load_local("new_index", OpenAIEmbeddings())
retriever = vector_index.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [10]:
llm = AzureChatOpenAI(
    temperature=0,
    deployment_name="gpt-4",
)

In [11]:
CONDENSE_QUESTION_PROMPT = """Comparison results:
Instructions: Please provide a comparison of the two recipes presented as sets of Workato steps. Identify and list the dissimilarities under the “Differences” section and any similarities under the “No Differences” section. Also, include a detailed logic, mentioning the relevant steps by referring to their identifiers. If you are unsure of the answer, do not attempt to make one up.

Chat History:
{chat_history}

QUERY: {question}"""

In [12]:
CONDENSEprompt = PromptTemplate.from_template(CONDENSE_QUESTION_PROMPT)

In [13]:
qa = ConversationalRetrievalChain.from_llm(llm=llm,
                                            retriever=retriever,
                                            condense_question_prompt=CONDENSEprompt,
                                            return_source_documents=True,
                                            verbose=False)

In [14]:
def ask_question_with_context(qa, question, chat_history):
    query = "A comparison of the two recipes presented as sets of Workato steps"
    result = qa({"question": question, "chat_history": chat_history})
    print("answer:", result["answer"])
    chat_history = [(query, result["answer"])]
    return chat_history

In [15]:
chat_history = []
ask_question_with_context(qa, "Please provide a comparison of the two recipes A and B", chat_history)

answer: Recipe A and Recipe B are very similar in structure and function. They both involve steps to search rows in a database, declare and update variables, call a recipe, send an email, log messages, and stop the process if an error occurs. 

However, there are a few differences:

1. The limit for the "search_rows_sql" action in Recipe A is set to "50000", while in Recipe B it is set to "10000". This means that Recipe A will retrieve more rows from the database in one go compared to Recipe B.

2. The recipes are named differently. The first one is named "recipe A" and the second one is named "recipe B". 

3. The input for the "call_recipe" action in Recipe A uses the user_text variable from step 6, while in Recipe B it uses the user_text variable from step 5. 

These differences may affect the performance and results of the recipes.


[('A comparison of the two recipes presented as sets of Workato steps',
  'Recipe A and Recipe B are very similar in structure and function. They both involve steps to search rows in a database, declare and update variables, call a recipe, send an email, log messages, and stop the process if an error occurs. \n\nHowever, there are a few differences:\n\n1. The limit for the "search_rows_sql" action in Recipe A is set to "50000", while in Recipe B it is set to "10000". This means that Recipe A will retrieve more rows from the database in one go compared to Recipe B.\n\n2. The recipes are named differently. The first one is named "recipe A" and the second one is named "recipe B". \n\n3. The input for the "call_recipe" action in Recipe A uses the user_text variable from step 6, while in Recipe B it uses the user_text variable from step 5. \n\nThese differences may affect the performance and results of the recipes.')]