In [1]:
from llm import llm_davinci
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
import os
import uuid
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import UnstructuredPowerPointLoader
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.document_loaders import UnstructuredWordDocumentLoader
from langchain.vectorstores import Chroma
from llm import llm_davinci

  from tqdm.autonotebook import tqdm


In [2]:
class DB:
    def __init__(self, persist_directory):
        # Create persist directory
        self.persist_directory = persist_directory
        os.makedirs(self.persist_directory, exist_ok=True)

        # Create Deep Lake vector store with the embedding key
        model_name = "sentence-transformers/all-mpnet-base-v2"

        self.embedding = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl", model_kwargs={"device": "cuda"})
        self.db = Chroma(persist_directory= self.persist_directory,
                              embedding_function=self.embedding)

        # Create QA chain
        self.llm = llm_davinci# should be change here later
        self.retriever = self.db.as_retriever()
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm, chain_type="stuff", retriever=self.retriever, return_source_documents=True)
        self.doc_id_map = {}

    def upload(self, file_path, file_type):
        if file_type == "txt":
            loader = TextLoader(file_path)
        elif file_type == "pdf":
            loader = PyPDFLoader(file_path)
        elif file_type == "docx":
            loader = UnstructuredWordDocumentLoader(file_path)
        elif file_type == "csv":
            loader = CSVLoader(file_path)
        elif file_type == "pptx":
            loader = UnstructuredPowerPointLoader(file_path)
        else:
            print("File type not supported")
            return
        # may be more here later
        parent_id = str(uuid.uuid4())
        # child_ids = []

        for doc in loader.load():
            child_ids = self.db.add_documents([doc])
            doc.metadata["id"] = child_ids 
            
            self.doc_id_map[parent_id] = {
                "ids": child_ids,
                "filename": os.path.basename(file_path)
            }

    def delete(self, id):
        if id in self.doc_id_map:
            info = self.doc_id_map[id] 
            child_ids = info["ids"]
        else: 
            child_ids = [id]
            
        del self.doc_id_map[id]
        self.db.delete(child_ids)

    def search(self, query):
        result = self.qa_chain(query)
        print("****************")
        print(result["result"])
        
        print("****************")
        print("\nSources:")
        for source in result["source_documents"]:
            print(source.metadata["source"])

    def list_documents(self):
            for parent_id, info in self.doc_id_map.items():
                print("Parent:")
                print(info["filename"])
                print(parent_id)
                
                print("Children:")
                for child_id in info["ids"]:
                    print(f"  {child_id}") 


In [3]:
db = DB("testing_db_dir")

load INSTRUCTOR_Transformer
max_seq_length  512


In [5]:
db.upload("./example_data/InnoLab_visit_developer_kids.pptx","pptx")

In [6]:
db.list_documents()

Parent:
InnoLab_visit_developer_kids.pptx
8fc47137-fae4-45c7-9bb1-bd25c3024d70
Children:


In [7]:
db.search("For C language, what is the advantage and usage of it?")

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


****************
 C language is simpler and more efficient than other languages. It is used for developing operating systems, computer graphics, and games.
****************

Sources:
./example_data/InnoLab_visit_developer_kids.pptx


In [8]:
db.delete("8fc47137-fae4-45c7-9bb1-bd25c3024d70")

In [9]:
db.list_documents()

In [None]:
db.search("What computer language are mentioned?")

In [None]:
db.search("What are the computer language")

In [None]:
db.search("What is ai")

In [None]:
db.search("What is the capital of India?")

In [None]:
from langchain.memory import MongoDBChatMessageHistory

In [None]:
connection_string = "mongodb+srv://projectvpn39:kDir8fgavrwmXhUN@cluster0.bdqojht.mongodb.net/?retryWrites=true&w=majority"

In [None]:
message_history = MongoDBChatMessageHistory(

    connection_string=connection_string, session_id="team-testing"

)

In [None]:
message_history.messages

In [None]:
message_history

In [None]:
print(memory)

In [None]:
memory.chat_memory.add_user_message("hi!")

In [None]:
print(memory.to_dict())

In [None]:
print(memory.to_dict()["messages"])

In [None]:
from pymongo import MongoClient

# Replace the placeholder values with your actual MongoDB connection details
client = MongoClient("mongodb+srv://projectvpn39:kDir8fgavrwmXhUN@cluster0.bdqojht.mongodb.net/?retryWrites=true&w=majority")
db = client["database"]
collection = db["collection"]

In [None]:
# Create an instance of the ConversationBufferMemory class
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [None]:
for i in memory:
    print(i)

In [None]:
for i in memory.chat_memory:
    print(i)

In [None]:
# Convert the memory object to a dictionary
memory_dict = {
    "chat_memory": memory.chat_memory.messages,
    "output_key": memory.output_key,
    "input_key": memory.input_key,
    "return_messages": memory.return_messages,
    "human_prefix": memory.human_prefix,
    "ai_prefix": memory.ai_prefix,
    "memory_key": memory.memory_key,
}

In [None]:
# Insert the memory object into the MongoDB collection
collection.insert_one(memory_dict)

In [None]:
from langchain.agents import load_tools
from langchain.chains import LLMMathChain
from langchain.agents import AgentType
from langchain.agents import initialize_agent
from langchain.agents import Tool

from power_automate import send_email
from llm import llm_azure_gpt35

from llm import internaL_db

In [None]:
llm_math_chain = LLMMathChain(llm=llm_azure_gpt35)
toolss = load_tools(["wikipedia"], llm=llm_azure_gpt35)
tools = [
    Tool(
        name="Calculator",
        func=llm_math_chain.run,
        description="useful for when you need to answer questions about maths, but not anything else",
        return_direct=True,  # help you to correct the prompt
    ),
    Tool(
        name="Internal Database",
        description="useful for when you need to answer questions about alphabet company annual report",
        func=internaL_db.run,
    ),
    Tool(
        name="Send Email",
        func=send_email,
        description=""""
        Useful for sending the email, please just send what is the intention of the email, and who is the target to send
        """
    ),
]


tools.append(toolss[0])
toolls = load_tools(["google-search"])
# tools[0].description = "This tool allows you to search the web using the Google Search API. Useful for when you need to answer questions about current events"
tools.append(toolls[0])

In [None]:
def generate_response(data, memory):
    # {'model': {'id': 'Vicuna', 'name': 'Vicuna', 'maxLength': 96000, 'tokenLimit': 32768}, 'systemPrompt': "You are ChatGPT, a large language model trained by OpenAI. Follow the user's instructions carefully. Respond using markdown.", 'temperature': 0.7, 'key': '', 'messages': [{'role': 'user', 'content': 'hello'}, {'role': 'assistant', 'content': 'Hello! How can I assist you today?'}, {'role': 'user', 'content': 'hello'}]}
    # print(data)
    agent_chain = initialize_agent(
        tools, llm_azure_gpt35, agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, memory=memory)
    response = agent_chain.run(input=data)
    # print(prompt)
    print(response)

In [None]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history", return_messages=True)

In [None]:
data = "HI"

In [None]:
generate_response(data, memory)

In [None]:
memory

In [None]:
for i in memory:
    print(i)
    

In [None]:
for i in memory.chat_memory:
    print(i)

In [None]:
for i in memory.chat_memory.messages:
    print(i)
    

In [None]:
from langchain.memory import ConversationBufferMemory

def mongo_to_conversation(mongo_history):
  conversation_memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
  
  # Set MongoDB chat history 
  conversation_memory.chat_memory.messages = mongo_history.messages

#   # Initialize other fields
#   conversation_memory.session_id = session_id

  return conversation_memory

In [None]:

from langchain.memory import MongoDBChatMessageHistory
connection_string = "mongodb+srv://projectvpn39:kDir8fgavrwmXhUN@cluster0.bdqojht.mongodb.net/?retryWrites=true&w=majority"

In [None]:
message_history = MongoDBChatMessageHistory(

    connection_string=connection_string, database_name="langchain", collection_name="chat_history", session_id="team-testing"

)

In [None]:
message_history.messages

In [None]:
c = mongo_to_conversation(message_history)

In [None]:
c.chat_memory.messages


In [1]:
from langchain.memory import ConversationSummaryBufferMemory



In [2]:
from llm import llm_azure_gpt35

  from tqdm.autonotebook import tqdm


In [4]:
y = ConversationSummaryBufferMemory(llm=llm_azure_gpt35,max_token_limit=100)

In [5]:
for i in y:
    print(i)

('human_prefix', 'Human')
('ai_prefix', 'AI')
('llm', AzureChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-35-turbo', temperature=0.0, model_kwargs={}, openai_api_key='f96c629ba6874b2aaa7569acfea2162c', openai_api_base='https://pocsc.openai.azure.com/', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=True, n=1, max_tokens=None, tiktoken_model_name=None, deployment_name='gpt35', openai_api_type='azure', openai_api_version='2023-03-15-preview'))
('prompt', PromptTemplate(input_variables=['summary', 'new_lines'], output_parser=None, partial_variables={}, template='Progressively summarize the lines of conversation provided, adding onto the previous summary returning a new summary.\n\nEXAMPLE\nCurrent summary:\nThe human asks what the AI thinks of artificial intelligence. The AI thinks artificial intelligence is 

In [6]:
y.chat_memory.messages

[]

In [None]:
from langchain.agents import load_tools
from langchain.chains import LLMMathChain
from langchain.agents import AgentType
from langchain.agents import initialize_agent
from langchain.agents import Tool

from power_automate import send_email
from llm import llm_azure_gpt35

from llm import internaL_db

In [None]:
llm_math_chain = LLMMathChain(llm=llm_azure_gpt35)
toolss = load_tools(["wikipedia"], llm=llm_azure_gpt35)
tools = [
    Tool(
        name="Calculator",
        func=llm_math_chain.run,
        description="useful for when you need to answer questions about maths, but not anything else",
        return_direct=True,  # help you to correct the prompt
    ),
    Tool(
        name="Internal Database",
        description="useful for when you need to answer questions about alphabet company annual report",
        func=internaL_db.run,
    ),
    Tool(
        name="Send Email",
        func=send_email,
        description=""""
        Useful for sending the email, please just send what is the intention of the email, and who is the target to send
        """
    ),
]


tools.append(toolss[0])
toolls = load_tools(["google-search"])
# tools[0].description = "This tool allows you to search the web using the Google Search API. Useful for when you need to answer questions about current events"
tools.append(toolls[0])

In [None]:
def generate_response(data, memory,mogodb):
    # {'model': {'id': 'Vicuna', 'name': 'Vicuna', 'maxLength': 96000, 'tokenLimit': 32768}, 'systemPrompt': "You are ChatGPT, a large language model trained by OpenAI. Follow the user's instructions carefully. Respond using markdown.", 'temperature': 0.7, 'key': '', 'messages': [{'role': 'user', 'content': 'hello'}, {'role': 'assistant', 'content': 'Hello! How can I assist you today?'}, {'role': 'user', 'content': 'hello'}]}
    # print(data)
    mogodb.add_user_message(data)
    agent_chain = initialize_agent(
        tools, llm_azure_gpt35, agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, memory=memory)
    response = agent_chain.run(input=data)
    mogodb.add_ai_message(response)
    # print(prompt)
    print(response)

In [None]:
data = "What is the weather like today?"

In [None]:
generate_response(data, c ,message_history)

In [None]:
for i in c:
    print(i)

In [None]:
for i in c.chat_memory.messages:
    print(i)

In [None]:
for i in memory.chat_memory.messages:
    print(i)

In [None]:
for i in c:
    print(i)

In [None]:
print(message_history)

In [None]:
message_history.session_id