In [3]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain, ConversationChain, LLMChain
from langchain.document_loaders import JSONLoader
from langchain.document_loaders import GoogleDriveLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryBufferMemory, ConversationBufferMemory
# from config import *
import os
import sys
import requests
import json
sys.path.append('./chat')

# SALES_GOAL = """provide clear and concise answers to potential customer about his queries about the company, but ultimately convince him to signup."""

SALES_TEMPLATE = """As acustomer service support, \
    respond to only questions to questions related to the Employment Situation report. If "Content Context" is empty, say that you don't know. \
    
    Below is a query from a user and
    some relevant contexts. Answer the question given the information in those
    contexts. 

    `Content Context`: ```{context}```

    Conversation Handling:

    Previous Chat: `{chat_history}`

    Last Customer Message: `{human_input}`

"""

PROMPT_TEMPLATE = ChatPromptTemplate.from_template(SALES_TEMPLATE)

class QAChain:
    def __init__(self, folder_id=None):
#         if len(examples) > 0:
#             self.examples_str = "\n\n".join([example['content'] for example in examples])
#         else:
#             self.examples_str = ""
        # Load docs from Google docs
        if folder_id is not None:
            self.folder_id = folder_id
            # Load docs from Google drive
            self.load_docs_from_google()
        else:
            self.docs_google = []
        # Load docs from manual docs on studio
#         self.load_docs_from_json()
        # Contatenate all docs
        self.docs = TextLoader(
        "NeMo-Guardrails/examples/grounding_rail/kb/report.md",
    ).load()

        # Create Chroma vectorstore
        self.create_vectorstore()

        # Define qa chain parameters
        self.llm = ChatOpenAI(temperature=0)
        self.memory = ConversationSummaryBufferMemory(llm=self.llm, memory_key="chat_history", input_key="human_input")
        self.prompt = PromptTemplate(
            input_variables=["chat_history", 
                             "human_input", 
                             "context" ], template=SALES_TEMPLATE
            )
        self.qa_chain = load_qa_chain(
            self.llm, 
            chain_type="stuff", 
            memory=self.memory, 
            prompt=self.prompt
        )
        
        self.retrieval_chain = TransformChain(
                    input_variables=["question"],
                    output_variables=["human_input", "input_documents"],
                    transform=self.retrieval_transform
                )

        self.rag_chain = SequentialChain(
            chains=[self.retrieval_chain, self.qa_chain],
            input_variables=["question"],  # we need to name differently to output "query"
            output_variables=["output_text"]
        )
        
    def retrieval_transform(inputs: dict) -> dict:
        docs = retriever.get_relevant_documents(query=inputs["question"])   
    #     docs = [d.page_content for d in docs]
        docs_dict = {
            "human_input": inputs["question"],
            "input_documents": docs
        }
        return docs_dict
    
    def get_response_over_docs(self, query):
        relevant_docs = self.retriever.get_relevant_documents(
            query
        )
        return self.qa_chain(
            {"input_documents": relevant_docs, 
             "human_input": query}, 
            return_only_outputs=True)['output_text']

    def get_response_over_docs2(self, query):
        out = rag_chain({"question": query})
        return out["output_text"]
    
    def load_docs_from_json(self):
        """Load docs from manual input on studio"""
        # Document loading
        self.loader_json = JSONLoader(
            file_path=f'./docs/{self.sessionId}.json',
            jq_schema='.documents[].description')

        self.docs_json = self.loader_json.load()

    def load_docs_from_google(self):
        """Folder should contain Google docs"""
        try:
            # Document loading
            self.loader_google = GoogleDriveLoader(
                folder_id=self.folder_id,
                # Optional: configure whether to recursively fetch files from subfolders. Defaults to False.
                recursive=False,
                credentials_path=".credentials/credentials.json",
                token_path=".credentials/token.json",
            )

            self.docs_google = self.loader_google.load()

        except Exception as e:
            # Check if the error relates to token expiration (adapt based on actual exception message)
            if 'token expired' in str(e).lower():
                refreshed = self.refresh_token()
                if refreshed:
                    self.load_docs_from_google() # Retry loading the documents
                else:
                    raise Exception("Failed to refresh the token.")
            else:
                raise e

    def create_vectorstore(self):
        self.text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
        self.documents = self.text_splitter.split_documents(self.docs)

        self.embeddings = OpenAIEmbeddings()
        self.vectorstore = Chroma.from_documents(self.documents, self.embeddings)
        self.retriever = self.vectorstore.as_retriever(search_kwargs={"k":1})
    
    def refresh_token(self):
        # Load values from credentials.json
        with open('.credentials/credentials.json', 'r') as cred_file:
            cred_data = json.load(cred_file)
        
        # Load refresh_token from token.json
        with open('.credentials/token.json', 'r') as token_file:
            token_data = json.load(token_file)

        # Constructing the refresh URL with the necessary parameters
        params = {
            'client_id': cred_data['installed']['client_id'],
            'client_secret': cred_data['installed']['client_secret'],
            'refresh_token': token_data['refresh_token'],
            'grant_type': 'refresh_token'
        }

        response = requests.post("https://oauth2.googleapis.com/token", data=params)

        if response.status_code == 200:
            new_token = response.json()['access_token']
            
            # Load existing token.json into a dictionary
            with open('.credentials/token.json', 'r') as token_file:
                token_data = json.load(token_file)
            
            # Update the access token
            token_data['token'] = new_token
            
            # Save updated token.json
            with open('.credentials/token.json', 'w') as token_file:
                json.dump(token_data, token_file)

            return True
        else:
            print("Error refreshing token:", response.json())
            return False


In [5]:
import time
import base64
import os
import json
import openai
from concurrent.futures import ThreadPoolExecutor

# import requests

# from chat import QAChain

from langchain.document_loaders import TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

from langchain.chains import TransformChain, SequentialChain

OPENAI_API_KEY = 

openai.api_key = OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

"""Initializes a QA chain using the jobs report.

It uses OpenAI embeddings.
"""
loader = TextLoader(
        "NeMo-Guardrails/examples/grounding_rail/kb/report.md",
    )

docs = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
documents = text_splitter.split_documents(docs)

llm = ChatOpenAI()

memory = ConversationSummaryBufferMemory(llm=llm, memory_key="chat_history", input_key="human_input")
prompt = PromptTemplate(
    input_variables=["chat_history", 
                     "human_input", 
                     "context" ], template=SALES_TEMPLATE)

embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k":1})

def retrieval_transform(inputs: dict) -> dict:
    docs = retriever.get_relevant_documents(query=inputs["question"])   
#     docs = [d.page_content for d in docs]
    docs_dict = {
        "human_input": inputs["question"],
        "input_documents": docs
    }
    return docs_dict

retrieval_chain = TransformChain(
                    input_variables=["question"],
                    output_variables=["human_input", "input_documents"],
                    transform=retrieval_transform
                )

qa_chain = load_qa_chain(
            llm, 
            chain_type="stuff", 
            memory=memory, 
            prompt=prompt
        )

rag_chain = SequentialChain(
    chains=[retrieval_chain, qa_chain],
    input_variables=["question"],  # we need to name differently to output "query"
    output_variables=["output_text"]
)

t = 0

for _ in range(10):
    start = time.time()

    out = rag_chain({"question": "Where is Germany"})
    print(out["output_text"])

    t += time.time() - start
    
print(t/10)

# query = "Summarize"

# relevant_docs = retriever.get_relevant_documents(
#             query
#         )

# response = qa_chain({"input_documents": relevant_docs, 
#              "human_input": query}, 
#             return_only_outputs=True)['output_text']

Created a chunk of size 528, which is longer than the specified 500


I'm sorry, but the query you provided is not related to the Employment Situation report. If you have any questions or concerns regarding the report, I'll be happy to assist you.
I'm sorry, but the query you provided is not related to the Employment Situation report. If you have any questions or concerns regarding the report, I'll be happy to assist you.
AI: I'm sorry, but the query you provided is not related to the Employment Situation report. If you have any questions or concerns regarding the report, I'll be happy to assist you.
AI: I'm sorry, but the query you provided is not related to the Employment Situation report. If you have any questions or concerns regarding the report, I'll be happy to assist you.
AI: I'm sorry, but the query you provided is not related to the Employment Situation report. If you have any questions or concerns regarding the report, I'll be happy to assist you.
AI: I'm sorry, but the query you provided is not related to the Employment Situation report. If yo

In [65]:
query = "What is Germany"

qa_chain = QAChain()

t = 0

for _ in range(10):
    start = time.time()

    response = qa_chain.get_response_over_docs2(query)

    print(response)

    t += time.time() - start
    
print(t/10)


Created a chunk of size 528, which is longer than the specified 500


AI: Germany is a country located in Europe.
Germany is a country located in Europe.
Germany is a country located in Europe.
Germany is a country located in Europe.
Germany is a country located in Europe.
Germany is a country located in Europe.
Germany is a country located in Europe.
Germany is a country located in Europe.
Germany is a country located in Europe.
Germany is a country located in Europe.
1.0837165117263794


In [6]:
query = "What is Employment situation in milky way"

qa_chain = QAChain()

relevant_docs = retriever.get_relevant_documents(
            query
        )

t = 0

for _ in range(10):
    start = time.time()

    response = qa_chain.get_response_over_docs(query)

    print(response)

    t += time.time() - start
    
print(t/10)


Created a chunk of size 528, which is longer than the specified 500


I'm sorry, but I don't have any information about the employment situation in the Milky Way. My knowledge is limited to the Employment Situation report for the United States.
AI: I'm sorry, but I don't have any information about the employment situation in the Milky Way. My knowledge is limited to the Employment Situation report for the United States.
AI: I'm sorry, but I don't have any information about the employment situation in the Milky Way. My knowledge is limited to the Employment Situation report for the United States.
AI: I'm sorry, but I don't have any information about the employment situation in the Milky Way. My knowledge is limited to the Employment Situation report for the United States.
AI: I'm sorry, but I don't have any information about the employment situation in the Milky Way. My knowledge is limited to the Employment Situation report for the United States.
AI: I'm sorry, but I don't have any information about the employment situation in the Milky Way. My knowledge

In [96]:
from langchain.agents.initialize import initialize_agent
from langchain.tools import Tool

retriever = vectorstore.as_retriever(search_kwargs={"k":1}, return_sources=False)

tools = [
    Tool(
        name="retrieval",
        func=lambda q: str(retriever.get_relevant_documents(
            query
        )),
        description="useful only for when you want to answer questions about the release of the Employment Situation report for April",
        return_direct=False,
    ),
]

query = "What is the unemployment rate?"

# set Logging to DEBUG for more detailed outputs
llm = ChatOpenAI(temperature=0)
memory = ConversationBufferMemory(memory_key="chat_history")
prompt = PromptTemplate(
    input_variables=["chat_history", 
                     "human_input", 
                     "context" ], template=SALES_TEMPLATE
    )
agent_executor2 = initialize_agent(
    tools, llm, agent="conversational-react-description", memory=memory, handle_parsing_errors=True
)

start = time.time()
print(agent_executor2.run(input=query))
print(time.time() - start)

Agent stopped due to iteration limit or time limit.
10.21777892112732


In [93]:
Tool??

In [67]:
from langchain.agents import Tool
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent

from llama_index import VectorStoreIndex, WikipediaReader
documents = WikipediaReader().load_data(pages=["Covid-19"])
index = VectorStoreIndex.from_documents(documents=documents)
tools = [
    Tool(
        name="LlamaIndex",
        func=lambda q: str(index.as_query_engine().query(q)),
        description="useful for when you want to answer questions about covid",
        return_direct=True,
    ),
]
# set Logging to DEBUG for more detailed outputs
memory = ConversationBufferMemory(memory_key="chat_history")
llm = ChatOpenAI(temperature=0)
agent_executor = initialize_agent(
    tools, llm, agent="conversational-react-description", memory=memory, handle_parsing_errors=True
)
start = time.time()
print(agent_executor.run(input="where is france"))
print(time.time() - start)

France is a country located in Western Europe. It is bordered by Belgium, Luxembourg, Germany, Switzerland, Italy, Spain, Andorra, and Monaco. The capital city of France is Paris, which is known for its iconic landmarks such as the Eiffel Tower and the Louvre Museum. France is also famous for its cuisine, fashion, art, and culture.
2.5997068881988525
