In [1]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [2]:
# 1. Load all PDF files in the current directory
all_docs = []

for file in os.listdir("."):
    if file.endswith(".pdf"):
        loader = PyPDFLoader(file)
        docs = loader.load()
        all_docs.extend(docs)

# 2. Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs_split = text_splitter.split_documents(all_docs)

# 3. Create embeddings
embeddings = OpenAIEmbeddings()

# 4. Build the FAISS vector store
db = FAISS.from_documents(docs_split, embeddings)

# 5. Create a retriever for semantic search
retriever = db.as_retriever()

In [3]:
docs_split

[Document(page_content="PDF généré le 03 août 2022 à 11:42 1\nRESTAURANT DE L'HOTEL DU SAUVAGE\n38 GRAND PLACE\n59670 CASSEL\nFrance\n +33 328424088\nrestaurantlesauvage@wanadoo.fr\nIdentifiant Kompass : FR0107518\nNuméro d’enregistrement (Siret, Siren…) :\n319774170 00025\nNuméro de TVA intracommunautaire :\nFR88 319774170\nCode NAF 2008 : 5610A - Restauration traditionnelle\nCode NACE 2008 : 5610 - Restaurants et services de restauration mobile\nCode ISIC 4 : 5610 - Activités de restaurants et de services de restauration mobiles\nConventions collectives : 1979 - OPCO entreprises et salariés des services à forte intensité de main-d'œuvre - Convention collective\nnationale des hôtels, cafés, restaurants (HCR)\nInformations générales\nForme juridique : Société à responsabilité limitée (sans autre\nindication)\nAnnée de fondation : 1980\nCapital actions émis : 7622 EUR\nChiffre d’affaires brut (2013) : 368212 EUR\nTranche du CA brut (2013) : Moins de 500 000 EUR\nType d’établissement : S

In [4]:
retriever

VectorStoreRetriever(vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x7fc3f40eeeb0>, search_type='similarity', search_kwargs={})

In [5]:
from langchain.tools import Tool
from langchain.chains import RetrievalQA
from langchain.agents import initialize_agent, load_tools, AgentType
from langchain.chat_models import ChatOpenAI

In [6]:
# 1. Initialize the LLM
llm = ChatOpenAI(temperature=0.2, model="gpt-3.5-turbo")

# 2. Create a QA chain based on the PDF retriever
qa_pdf = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff"
)

# 3. Wrap the PDF QA into a custom tool
pdf_tool = Tool(
    name="EntreprisePDF",
    func=qa_pdf.run,
    description="Use this tool to answer questions about the company based on the PDF documents."
)

# 4. Load external tools (Wikipedia, math calculator, etc.)
# Business Expert tool
business_tool = Tool(
    name="BusinessExpert",
    func=lambda q: "I provide insights about business strategy, management, and operations.",
    description="Useful for answering questions about business strategy, management, and operations."
)

# Finance Expert tool
finance_tool = Tool(
    name="FinanceExpert",
    func=lambda q: "I provide insights about financial analysis, accounting, and corporate finance.",
    description="Useful for answering questions about financial analysis, accounting, and investments."
)

# HR Expert tool
hr_tool = Tool(
    name="HRExpert",
    func=lambda q: "I provide insights about human resources, recruitment, and employee management.",
    description="Useful for answering questions about HR, recruitment, and company culture."
)

# Load default tools (Wikipedia)
tools = load_tools(["wikipedia"], llm=llm)


# 5. Add the custom PDF tool to the list of tools
tools.extend([pdf_tool, business_tool, finance_tool, hr_tool])

In [7]:
# Agent multi-outils
agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=False
)

# Test:

### Informations from PDF

In [8]:
query = "Quel est le numéro de téléphone de l'entreprise RHODES?"
agent.run(query)

'The phone number for the company RHODES is +33 140510738.'

In [9]:
query = "Quelle est la capitale actions émis de l'entreprise EIREAN?"
agent.run(query)

'The capital actions émis of the company EIREAN is 8000 EUR.'

### Informations from web and another sources

In [11]:
query = "Who is Mr Beast?"
agent.run(query)

'Mr Beast, whose real name is James Stephen Donaldson, is an American YouTuber, media personality, and businessman known for his philanthropic efforts and elaborate challenges on YouTube. He is the most subscribed channel on YouTube and has over 419 million subscribers. He is also the founder of MrBeast Burger and has been involved in various philanthropic initiatives.'

In [12]:
query = "Give me some informations about Microsoft"
agent.run(query)

'Microsoft Corporation is an American multinational corporation and technology conglomerate headquartered in Redmond, Washington. It was founded in 1975 and has since become influential in the rise of personal computers through software like Windows.'