!pip install \
    python-dotenv==1.0.0 \
    openai==0.28.1 \
    langchain==0.1.10 \
    tiktoken==0.5.1 \
    gradio==4.5.0 \
    langchain-text-splitters \
    pdfplumber \
    meilisearch \
    chromadb==0.4.15 

!pip install \
    python-dotenv==1.0.0 \
    openai==0.28.1 \
    langchain==0.0.316 \
    tiktoken==0.5.1 \
    gradio==4.5.0 \
    langchain-text-splitters \
    pdfplumber \
    meilisearch \
    chromadb==0.4.15


In [1]:
!pip install \
    python-dotenv==1.0.0 \
    openai==1.37.1 \
    langchain-openai==0.1.19 \
    langchain==0.2.11 \
    tiktoken --upgrade \
    gradio==4.5.0 \
    langchain-text-splitters \
    pdfplumber \
    meilisearch \
    chromadb==0.4.15


Collecting langchain-text-splitters
  Using cached langchain_text_splitters-0.3.2-py3-none-any.whl.metadata (2.3 kB)



[notice] A new release of pip is available: 23.3.1 -> 24.3.1
[notice] To update, run: C:\Users\logis\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip


In [2]:
# System related imports
import os
from dotenv import load_dotenv, find_dotenv

# OpenAI related imports
import openai
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings

# AssemblyAI and Document storage related imports
import tiktoken
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import AssemblyAIAudioTranscriptLoader

# Agent imports
import logging
from typing import List
from pydantic import BaseModel, Field
from langchain.chains import LLMChain
from langchain.agents import AgentExecutor
from langchain.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
from langchain.output_parsers import PydanticOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.tools.render import format_tool_to_openai_function
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.agents.format_scratchpad import format_to_openai_functions
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser

# Chat UI
import gradio as gr
from langchain.schema import AIMessage, HumanMessage

In [3]:
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key = os.environ['OPENAI_API_KEY']

In [4]:
import getpass
import os

if "MEILI_HTTP_ADDR" not in os.environ:
    os.environ["MEILI_HTTP_ADDR"] = getpass.getpass(
        "Meilisearch HTTP address and port:"
    )
if "MEILI_MASTER_KEY" not in os.environ:
    os.environ["MEILI_MASTER_KEY"] = getpass.getpass("Meilisearch API Key:")

In [5]:
from langchain_community.vectorstores import Meilisearch
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter

embeddings = OpenAIEmbeddings()
embedders = {
    "default": {
        "source": "userProvided",
        "dimensions": 1536,
    }
}
embedder_name = "default"

import pdfplumber
from langchain.text_splitter import CharacterTextSplitter

# Extract text from PDF
with pdfplumber.open("PCsPeikkoGroup003TMAWeb.pdf") as pdf:
    connection_detail_manual = ""
    for page in pdf.pages:
        text = page.extract_text()  # Extract text from each page
        if text:  # Check if text extraction is successful
            connection_detail_manual += text

# Split text into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(connection_detail_manual)

# Print or further process the split text chunks
print(texts)



In [7]:
import pdfplumber
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document  # Import Document class
from langchain.vectorstores import Meilisearch
import meilisearch

pdf_path = "PCsPeikkoGroup003TMAWeb.pdf"
connection_detail_manual = ""

with pdfplumber.open(pdf_path) as pdf:
    for page in pdf.pages:
        connection_detail_manual += page.extract_text()  # Extract text from each page

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(connection_detail_manual)

documents = [Document(page_content=text) for text in texts]  # Create Document objects


In [8]:
"""vector_store = Meilisearch.from_documents(
    documents=documents,
    embedding=embeddings, 
    embedders=embedders,  
    embedder_name=embedder_name,
)

print(vector_store)"""

'vector_store = Meilisearch.from_documents(\n    documents=documents,\n    embedding=embeddings, \n    embedders=embedders,  \n    embedder_name=embedder_name,\n)\n\nprint(vector_store)'

In [9]:
embedding = OpenAIEmbeddings()
persist_directory = 'docs/chroma/'

vectordb = Chroma.from_documents(
    documents=documents,
    embedding=embedding,
    persist_directory=persist_directory
)

In [10]:
# Output parser will split the LLM result into a list of queries
class LineList(BaseModel):
    # "lines" is the key (attribute name) of the parsed output
    lines: List[str] = Field(description="Lines of text")


class LineListOutputParser(PydanticOutputParser):
    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        lines = text.strip().split("\n")
        return LineList(lines=lines)


output_parser = LineListOutputParser()

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five 
    different versions of the given user question to retrieve relevant documents from a vector 
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search. 
    Provide these alternative questions separated by newlines.
    Original question: {question}""",
)
llm = ChatOpenAI(temperature=0)

multi_query_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)

retriever = MultiQueryRetriever(
    retriever=vectordb.as_retriever(), llm_chain=multi_query_chain, parser_key="lines"
)

search_related_documents = create_retriever_tool(
    retriever,
    "RAG",
    "Retrieves context related to the question from the audio provided by the user.",
)

tools = [search_related_documents]
functions = [format_tool_to_openai_function(f) for f in tools] # convert tools to OpenAI functions

In [11]:
search_related_documents = create_retriever_tool(
    retriever,
    "RAG",
    "Retrieves context related to the question from the audio provided by the user.",
)

tools = [search_related_documents]
functions = [format_tool_to_openai_function(f) for f in tools] # convert tools to OpenAI functions

In [12]:
model = ChatOpenAI(temperature=0).bind(functions=functions)

prompt = ChatPromptTemplate.from_messages([
    ("system", """Act as a construction engineer assistant. \
     You have a knowledge base of documentation for different poducts of the construction company.\
     The user may ask you questions or ask for recommendations that require knowledge about the product. \
     You can use the documents to retrieve context to answer the questions."""),
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")
])

In [13]:
agent_chain = RunnablePassthrough.assign(
    agent_scratchpad= lambda x: format_to_openai_functions(x["intermediate_steps"])
) | prompt | model | OpenAIFunctionsAgentOutputParser()

In [14]:
# Define memory type to store the conversation history
memory = ConversationBufferMemory(return_messages=True,memory_key="chat_history")
agent_executor = AgentExecutor(agent=agent_chain, tools=tools, verbose=True, memory=memory)

In [15]:
# Set logging for the queries
logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [19]:
agent_executor.invoke({"input": "Hi! Nice to meet you. My name is Logis."})



[1m> Entering new AgentExecutor chain...[0m


AttributeError: module 'openai' has no attribute 'error'

In [None]:
agent_executor.invoke({"input": "What is my name?"})

In [None]:
agent_executor.invoke({"input": "What topics are discussed in the audio?"})

In [None]:
def predict(message, history):
    history = []
    for human, ai in history:
        history.append(HumanMessage(content=human))
        history.append(AIMessage(content=ai))
    history.append(HumanMessage(content=message))
    response = agent_executor.invoke({"input": message})
    return response["output"]
    
gr.ChatInterface(predict).queue().launch()