In [None]:
import os

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

### LangChain LLM Wrappers

Standard LLM wrappers for LangChain.

In [None]:
from langchain.llms import AzureOpenAI
llm = AzureOpenAI(model_kwargs={'engine':'text-davinci-003'})
llm("Hello. I'm Daron!")


Switching to the Chat Model in LangChain

In [None]:
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain.chat_models import AzureChatOpenAI

In [None]:
chat = AzureChatOpenAI(
    openai_api_base=os.environ.get("OPENAI_API_BASE"),
    openai_api_version=os.environ.get("OPENAI_CHAT_API_VERSION"),
    deployment_name=os.environ.get("OPENAI_CHAT_DEPLOYMENT_NAME"),
    openai_api_key=os.environ.get("OPENAI_API_KEY"),
    openai_api_type=os.environ.get("OPENAI_API_TYPE"),
)
messages = [
    SystemMessage(content="You are an expert cloud architect in love with Azure"),
    HumanMessage(content="Tell me what NoSQL Service is the best for a global deployment?"),
]
response = chat(messages) 
print(response.content, end='\n\n')

### Prompt Templates

In [None]:
from langchain import PromptTemplate

template = """You are an expert cloud architect in love with Azure. Explain to me the benefits of {concept}."""
prompt = PromptTemplate(
    template=template,
    input_variables=["concept"]
    )

In [None]:
llm(prompt.format(concept="Azure Functions"))

### Chain

In [None]:
from langchain.chains import LLMChain                 

second_template = """You are an expert educator and trainer. Help the below content making it easy to understand for anyone. {newconcept}"""
second_prompt = PromptTemplate(
    template=second_template,
    input_variables=["newconcept"]
    )

explainerChain = LLMChain(llm=llm, prompt=prompt)
educatorChain = LLMChain(llm=llm, prompt=second_prompt)

from langchain.chains import SimpleSequentialChain
learningChain = SimpleSequentialChain(chains=[explainerChain, educatorChain], verbose=True)

output = learningChain.run("Azure Functions")
print(output)

### Embeddings 

Reading local text file.

In [None]:
import os

current_directory = os.getcwd()
file_name = "azure-functions-June-2023-Updates.txt"
file_path = os.path.join(current_directory, file_name)

with open(file_path, "r", encoding="utf-8", errors="replace") as file:
    contentUpdate = file.read()

Splitting text into chunks.

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 0,
)

texts = text_splitter.create_documents([contentUpdate])

print(texts[0].page_content)

Creating embeddings.

In [None]:
import openai 
import uuid

from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import TextLoader

def generate_embeddings(text):
    response = openai.Embedding.create(
        input=text, engine="text-embedding-ada-002")
    embeddings = response['data'][0]['embedding']
    return embeddings

embedding_collection = []
for chunk in texts:
    embedding_collection.append({
        'id': str(uuid.uuid4()),  # Random UUID as ID
        'content': chunk.page_content,  # the title of the page
        'contentVector': generate_embeddings(chunk.page_content)  # the embedding data
    })

print(embedding_collection[0])

### VectorStores

Installing Dev Package for Azure Cognitive Search as the Vector Search feature is still in preview.

In [None]:
! pip install azure-search-documents==11.4.0a20230509004 --index-url https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/ -vvv --no-deps

Creating the index.

In [None]:
import os
from azure.core.credentials import AzureKeyCredential 
from azure.search.documents.indexes import SearchIndexClient 
from azure.search.documents.indexes.models import (  
    SearchIndex,  
    SearchField,  
    SearchFieldDataType,  
    SearchableField,
    SimpleField,  
    SearchIndex,  
    SearchField,  
    VectorSearch,  
    VectorSearchAlgorithmConfiguration,  
)   

service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")  
credential = AzureKeyCredential(os.getenv("AZURE_SEARCH_ADMIN_KEY"))

index_client = SearchIndexClient(endpoint=service_endpoint, credential=credential)

#Azure OpenAI model, text-embedding-ada-002 with 1,536 dimensions means one document would consume 1,536 floats.
fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True),
    SearchableField(name="content", type=SearchFieldDataType.String,
                    searchable=True, retrievable=True),
    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, dimensions=1536, vector_search_configuration="my-vector-config"),
]

vector_search = VectorSearch(
    algorithm_configurations=[
        VectorSearchAlgorithmConfiguration(
            name="my-vector-config",
            kind="hnsw", # HNSW is the only supported algorithm for now (Hierarchical Navigable Small World)
            hnsw_parameters={
                "m": 4, # the number of bi-directional links created for every new element during construction
                "efConstruction": 400, # the maximum number of nodes to visit during the construction of the graph
                "efSearch": 1000, # the maximum number of nodes to visit during the search
                "metric": "cosine"
            }
        )
    ]
)

index = SearchIndex(name="vectorindex", fields=fields,
                    vector_search=vector_search)
result = index_client.create_or_update_index(index)
print(f' {result.name} created')

Embedd uploads to Azure Cognitive Search

In [None]:
import uuid

from azure.search.documents import SearchClient 

search_client = SearchClient(endpoint=service_endpoint, index_name='vectorindex', credential=credential)
result = search_client.upload_documents(embedding_collection)  

Performing a vector similarity search

In [None]:
query = "Can you provide the timestamp for the most recent information you have on Azure Functions? Please specify the date and time up to your last update."  

In [None]:
from azure.search.documents.models import Vector 
from itertools import tee
  
search_client = SearchClient(service_endpoint, "vectorindex", AzureKeyCredential(os.getenv("AZURE_SEARCH_ADMIN_KEY")))  
  
results = search_client.search(  
    search_text="",  
    vector=Vector(value=generate_embeddings(query), k=3, fields="contentVector"),  
    select=["id", "content"] 
)  

# Initialize a string to store the results
top_results = ""

results, results_backup = tee(results)
  
for result in results_backup:  
    top_results += f"Score: {result['@search.score']}\nContent: {result['content']}\n\n"
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  

### Retrievel Augmented Generation (RAG)

Standard GPT Output

In [None]:
llm("Can you provide the timestamp for the most recent information you have on Azure Functions? Please specify the date and time up to your last update.")

Augmented GPT Output

In [None]:
from langchain import PromptTemplate

rag_template = '''
###
CONTEXT:
"""
{context}
"""
###
RULES:
You are a helpful agent that answers questions.
Learn from the CONTEXT and answer the following question.
###
{question}
'''
rag_prompt = PromptTemplate(
        template=rag_template,
        input_variables=["context", "question"]
        )
llm(rag_prompt.format(question="Can you provide the timestamp for the most recent information you have on Azure Functions? Please specify the date and time up to your last update.", context=top_results))