# Setup Environment
The following examples are all based on OpenAI APIs/models in Azure.

In [1]:
pip install openai

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install langchain

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
%env OPENAI_API_TYPE=azure
# The API version you want to use.
%env OPENAI_API_VERSION=<API Version>
# The base URL for your Azure OpenAI resource.  You can find this in the Azure portal under your Azure OpenAI resource.
%env OPENAI_API_BASE=<Your endpoint>
# The API key for your Azure OpenAI resource.  You can find this in the Azure portal under your Azure OpenAI resource.
%env OPENAI_API_KEY=<Your key>

# Educate GPT on utilizing tools

In [8]:
from langchain.agents import initialize_agent, Tool
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import LLMRequestsChain
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

llm = AzureChatOpenAI(deployment_name="GPT4") #deployment_name should be your deployment name for GPT model

def query_google(question):
    template = """Between >>> and <<< are the raw search result text from google.
    Extract the answer to the question '{query}' or say "not found" if the information is not contained.
    Use the format
    Extracted:<answer or "not found">
    >>> {requests_result} <<<
    Extracted:"""

    PROMPT = PromptTemplate(
        input_variables=["query", "requests_result"],
        template=template,
    )

    inputs = {
        "query": question,
        "url": "https://www.google.com/search?q=" + question.replace(" ", "+")
    }
    requests_chain = LLMRequestsChain(llm_chain=LLMChain(llm=llm, prompt=PROMPT), output_key="query_info")
    res = requests_chain.run(inputs)
    print("res=", res)
    return res

tools = [
    Tool(
        name="Get current info", func=query_google,
        description="""invoke it for when you need to answer questions about current info. 
        And the input should be a search query.""")
]

agent = initialize_agent(tools, llm, agent="zero-shot-react-description", max_iterations=3, verbose=True)
print(agent.run("What is appropriate to wear in Beijing today?"))




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find out the current weather in Beijing to determine appropriate clothing.
Action: Get current info
Action Input: current weather in Beijing[0mres= 26°C, Mostly sunny

Observation: [36;1m[1;3m26°C, Mostly sunny[0m
Thought:[32;1m[1;3mI now know the weather in Beijing today.
Final Answer: It is appropriate to wear light, comfortable clothing such as a t-shirt and shorts or pants, along with comfortable shoes, and perhaps a light jacket or cardigan for cooler indoor environments, in Beijing today.[0m

[1m> Finished chain.[0m
It is appropriate to wear light, comfortable clothing such as a t-shirt and shorts or pants, along with comfortable shoes, and perhaps a light jacket or cardigan for cooler indoor environments, in Beijing today.


In [9]:
# Embedded the content and store them into VectorDB
from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import AzureOpenAI
from langchain.vectorstores import Chroma
from langchain.document_loaders import DirectoryLoader


loader = UnstructuredMarkdownLoader("maxlang.md")
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)

split_docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings(deployment="embedding",chunk_size=1) #deployment is the name of your embedding model

docsearch = Chroma.from_documents(split_docs, embeddings)


Created a chunk of size 131, which is longer than the specified 100
Created a chunk of size 165, which is longer than the specified 100
Created a chunk of size 155, which is longer than the specified 100
Created a chunk of size 158, which is longer than the specified 100
Created a chunk of size 197, which is longer than the specified 100
Created a chunk of size 143, which is longer than the specified 100
Created a chunk of size 103, which is longer than the specified 100
Created a chunk of size 164, which is longer than the specified 100
Created a chunk of size 172, which is longer than the specified 100
Created a chunk of size 102, which is longer than the specified 100
Created a chunk of size 175, which is longer than the specified 100
Using embedded DuckDB without persistence: data will be transient


In [12]:
# Query the vectors 
from langchain.chains import RetrievalQA
from langchain.llms import AzureOpenAI

llm = AzureOpenAI( temperature=0, engine="txt")
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="map_reduce", retriever=docsearch.as_retriever(),return_source_documents=True)

# result = qa.run("how to extract the attribute value from json string")
result = qa({"query": "how to extract the attribute value from json string"})
print(result["result"])

                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


 The syntax for extracting the attribute value from a JSON string depends on the type of the value. If the value is a primitive object, you can use jpath("skills.coding.[1]", json). If the value is a collection or a hashtable, you can use jpath("skills",json)["coding"][2].
