# QA over Documents with Azure Cognitive Search as Vector Database

In [1]:
import os
import openai
from dotenv import load_dotenv
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import RetrievalQA
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.agents import initialize_agent, Tool, AgentType


load_dotenv(verbose=True)

OPENAI_API_TYPE = os.getenv("OPENAI_API_TYPE")
OPENAI_API_VERSION = os.getenv("OPENAI_API_VERSION")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE")

In [2]:
embeddings = OpenAIEmbeddings(deployment='text-embedding-ada-002')

vector_store = AzureSearch(
    azure_search_endpoint=os.getenv("VECTOR_STORE_ADDRESS"),
    azure_search_key=os.getenv("VECTOR_STORE_KEY"),
    index_name="mackenzie",
    embedding_function=embeddings.embed_query,
)

In [3]:
loader = PyPDFDirectoryLoader("pdfs")

documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

vector_store.add_documents(documents=docs);

In [6]:
llm = AzureChatOpenAI(
    openai_api_base=OPENAI_API_BASE,
    openai_api_version=OPENAI_API_VERSION,
    deployment_name='gpt-35-turbo-16k',
    openai_api_key=OPENAI_API_KEY,
    openai_api_type=OPENAI_API_TYPE,
)

qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=vector_store.as_retriever(),
    return_source_documents=False
)

In [7]:
query = "What does a product manager do?"
result = qa({"query": query})

In [8]:
result

{'query': 'What does a product manager do?',
 'result': "A product manager is responsible for overseeing the development and management of a company's products. They work closely with cross-functional teams such as engineering, marketing, and sales to ensure that products align with the company's goals and meet customer needs. Some specific responsibilities of a product manager include:\n\n1. Developing product strategies and plans\n2. Conducting market research and identifying customer needs\n3. Creating and managing product roadmaps\n4. Collaborating with internal teams to ensure successful product execution\n5. Analyzing product performance and customer feedback to identify areas for improvement\n6. Overseeing product lifecycle management processes\n7. Developing pricing strategies and cost models\n8. Managing product budgets\n9. Leading and guiding the product management team."}

# OpenAI Functions Agent

In [3]:
from pydantic import BaseModel, Field
from langchain.tools.base import BaseTool
from typing import Type

class FakeUserInput(BaseModel):
    """Input for FakeUserTool."""

    gender: str = Field(..., description="Fake user gender. Available genders: 'male' or ''female")
    naturality: str = Field(..., description="Country ISO 3166 code")

class FakeUserTool(BaseTool):
    """Tool that creates fake user data."""

    name: str = "fake_user_tool"
    args_schema: Type[BaseModel] = FakeUserInput
    description: str = "Tool to generate fake user data considering a gender and a nationality."

    def _run(self, gender: str, naturality: str) -> str:
        try:
            import requests

            response = requests.get(
                'https://randomuser.me/api/1.4/', 
                params={'gender': gender,
                        'nat': naturality}
            )
            return response.text
        except Exception as e:
            return "Error: " + str(e)

In [4]:
tools = [FakeUserTool()]

In [7]:
agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)

In [8]:
agent('Create fake data for a male that was born in brazil')



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `fake_user_tool` with `{'gender': 'male', 'naturality': 'BR'}`


[0m[36;1m[1;3m{"results":[{"gender":"male","name":{"title":"Mr","first":"Prudêncio","last":"da Cruz"},"location":{"street":{"number":7876,"name":"Rua Amazonas "},"city":"Macaé","state":"Tocantins","country":"Brazil","postcode":65740,"coordinates":{"latitude":"-10.0286","longitude":"101.3643"},"timezone":{"offset":"+6:00","description":"Almaty, Dhaka, Colombo"}},"email":"prudencio.dacruz@example.com","login":{"uuid":"3cc18f90-d07b-4c01-ac28-24eca49b1c8a","username":"silverdog597","password":"bigfoot","salt":"cYcEUUah","md5":"32292b0c0546c2f6253a93f976e6b37d","sha1":"7788d37b01b985480a7f5dfba240ca2c14a8faea","sha256":"83dd81f5bdf37627f78e6024bccee6a188bafbe3623e264b0d4e03a8a22fd448"},"dob":{"date":"1955-04-12T22:21:53.104Z","age":68},"registered":{"date":"2008-02-29T05:18:02.572Z","age":15},"phone":"(76) 2671-7564","cell":"(60) 2719-1902","id":{"name

{'input': 'Create fake data for a male that was born in brazil',
 'output': 'Here is the fake data for a male born in Brazil:\n\n- Name: Mr Prudêncio da Cruz\n- Gender: Male\n- Date of Birth: April 12, 1955 (68 years old)\n- Nationality: Brazilian\n- Email: prudencio.dacruz@example.com\n- Username: silverdog597\n- Password: bigfoot\n- Address: Rua Amazonas 7876, Macaé, Tocantins, Brazil, 65740\n- Phone: (76) 2671-7564\n- Cell: (60) 2719-1902\n- Picture: ![Picture](https://randomuser.me/api/portraits/men/93.jpg)'}

# OpenAI Functions Multi Tool Agent

In [14]:
qa_tool = Tool(
    name='role-library-tool',
    func=qa.run,
    description='Useful when a user wants answers about the company employee handbook'
)

tools = [FakeUserTool(), qa_tool]

In [15]:
agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)

In [16]:
agent('Create fake data for a female that was born in france')



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `fake_user_tool` with `{'gender': 'female', 'naturality': 'FR'}`


[0m[36;1m[1;3m{"results":[{"gender":"female","name":{"title":"Mrs","first":"Emy","last":"Philippe"},"location":{"street":{"number":1945,"name":"Avenue du Fort-Caire"},"city":"Brest","state":"Alpes-de-Haute-Provence","country":"France","postcode":74064,"coordinates":{"latitude":"53.4326","longitude":"-53.8271"},"timezone":{"offset":"-7:00","description":"Mountain Time (US & Canada)"}},"email":"emy.philippe@example.com","login":{"uuid":"c833130b-28e0-4b43-9854-7b0aa4195ec2","username":"goldenfish535","password":"kappa","salt":"jF5dTpmu","md5":"c31e4939d9061cf9687f205f0284a041","sha1":"798a7f3a5efe67e47ad0e49ebf010961eb640666","sha256":"c3badea07d5a4c7c747855644d7625a25b3eddeabe5bcfca5cd8ae746a091546"},"dob":{"date":"1984-07-12T00:29:21.500Z","age":39},"registered":{"date":"2003-05-14T23:51:50.827Z","age":20},"phone":"04-20-63-87-12","cell":"06-65-6

{'input': 'Create fake data for a female that was born in france',
 'output': 'Here is the generated fake user data:\n\n- Name: Mrs. Emy Philippe\n- Location: 1945 Avenue du Fort-Caire, Brest, Alpes-de-Haute-Provence, France\n- Postcode: 74064\n- Email: emy.philippe@example.com\n- Username: goldenfish535\n- Date of Birth: 1984-07-12\n- Registered since: 2003-05-14\n- Phone: 04-20-63-87-12\n- Cell: 06-65-67-82-12\n- ID: INSEE 2840629313726 16\n\n![Emy Philippe](https://randomuser.me/api/portraits/women/64.jpg)'}

In [None]:
agent('What does a product manager do?')