In [None]:
#pip install azure-search-documents
#pip install azure-identity


In [None]:
import os

from langchain.embeddings import OpenAIEmbeddings, AzureOpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch

In [None]:
# Assume set the following environment variables
# AZURE_OPENAI_API_KEY
# AZURE_OPENAI_ENDPOINT
# AZURE_OPENAI_API_VERSION
# AZURE_AISEARCH_ADDRESS
# AZURE_AISEARCH_API_KEY


vector_store_address=os.getenv('AZURE_AISEARCH_ADDRESS').rstrip()
vector_store_password=os.getenv('AZURE_AISEARCH_API_KEY').rstrip()

azure_openai_deployment = "support-embedding"
model                   = "text-embedding-ada-002"
index_name              = "langchain-search-index"
azure_openai_version    = "2023-05-15"

The index in Azure AI search must be created with the following required fields

 name | type | key | retrievable | searchable
  --- | --- | --- | --- | ---
  id | Edm.String | true | true | false
  content | Edm.String | false | false | false
  metadata | Edm.String | false | false | false 
  content_vector | Collection(Edm.Single) | false | false | true
  


In [None]:
from azure.search.documents.indexes.models import (
    SearchableField,
    SearchField,
    SearchFieldDataType,
    SimpleField,
)

embeddings = AzureOpenAIEmbeddings( 
    azure_deployment=azure_openai_deployment,
    openai_api_version=azure_openai_version,  
)

fields = [
    SimpleField(
        name="id",
        type=SearchFieldDataType.String,
        key=True,
        filterable=True,
    ),
    SearchableField(
        name="content",
        type=SearchFieldDataType.String,
        searchable=True,
    ),
    SearchField(
        name="content_vector",
        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
        searchable=True,
        vector_search_dimensions=len(embeddings.embed_query("Text")),
        vector_search_configuration="default",
    ),
    SearchableField(
        name="metadata",
        type=SearchFieldDataType.String,
        searchable=True,
    ),
    # Additional field to store the title
    SearchableField(
        name="title",
        type=SearchFieldDataType.String,
        searchable=True,
    ),
    # Additional field for filtering on document source
    SimpleField(
        name="source",
        type=SearchFieldDataType.String,
        filterable=True,
    ),
]

index_name: str = "langchain-vector-demo-custom"

vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_password,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
    fields=fields,
)

In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

loader = TextLoader("../CV_en.txt", encoding="utf-8")

documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

vector_store.add_documents(documents=docs)

## Perform a Vector Similarity search 
> [langchain docs](https://python.langchain.com/docs/integrations/vectorstores/azuresearch#perform-a-vector-similarity-search)

In [None]:
query = "what is the education of candidate"

# Perform a similarity search
docs = vector_store.similarity_search(
    query=query,
    k=3,
    search_type="similarity",
)
print(docs[0].page_content)