# Azure Cognitive Search LangChain Vector Code Sample
This code demonstrates how to use Azure Cognitive Search with OpenAI and the Azure Cognitive Search LangChain Vector Store
To run the code, install the following packages. Please use the latest pre-release version `pip install azure-search-documents --pre`.

In [None]:
! pip install azure-search-documents --pre 
! pip install openai
! pip install python-dotenv
! pip install langchain

## Import required libraries and environment variables

In [15]:
# Import required libraries  
import openai
import os  
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch
from azure.search.documents.indexes.models import (
    SemanticSettings,
    SemanticConfiguration,
    PrioritizedFields,
    SemanticField
)
import pandas as pd

# load environment variables from .env
load_dotenv()


True

## Configure OpenAI Settings

In [16]:
# Configure environment variables - uncomment Azure settings if using Azure OpenAI 
openai.api_type: os.getenv("OPENAI_API_TYPE")
openai.api_key = os.getenv("OPENAI_API_KEY")  
openai.api_base = os.getenv("OPENAI_API_BASE")  
openai.api_version = os.getenv("OPENAI_API_VERSION")  
completion_model: str = os.getenv("OPENAI_MODEL")
embedding_model: str = os.getenv("OPENAI_MODEL_EMBED")

## Configure Vector Store Settings

In [17]:
vector_store_address: str = os.getenv("AZURE_SEARCH_ENDPOINT")  
vector_store_key: str = os.getenv("AZURE_SEARCH_ADMIN_KEY") 
index_name: str = os.getenv("AZURE_SEARCH_INDEX") 

## Create index
Create the index in Azure Cognitive Search service:

In [18]:
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(deployment=embedding_model, model=embedding_model, chunk_size=1, openai_api_key=openai.api_key, openai_api_base=openai.api_base, openai_api_type="azure" )

vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_key,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
    semantic_configuration_name='centriqe-config',
        semantic_settings=SemanticSettings(
            default_configuration='centriqe-config',
            configurations=[
                SemanticConfiguration(
                    name='centriqe-config',
                    prioritized_fields=PrioritizedFields(
                        title_field=SemanticField(field_name='content'),
                        prioritized_content_fields=[SemanticField(field_name='content')],
                        prioritized_keywords_fields=[SemanticField(field_name='metadata')]
                    ))
            ])
    )

In [19]:
# read csv into df - the catalog has following columns 
# ProductID, ProductName, ProductBrand, Gender, Price (INR), Description, PrimaryColor, ImageURL

df = pd.read_csv('../data/products_catalog.csv')
print(df)

     ProductID                                        ProductName  \
0     10017413  DKNY Unisex Black & Grey Printed Medium Trolle...   
1     10016283  EthnoVogue Women Beige & Grey Made to Measure ...   
2     10009781  SPYKAR Women Pink Alexa Super Skinny Fit High-...   
3     10015921  Raymond Men Blue Self-Design Single-Breasted B...   
4     10017833  Parx Men Brown & Off-White Slim Fit Printed Ca...   
...        ...                                                ...   
1724      OOEO  Modi Jacket Mens Orange Terry Wool Textured Re...   
1725      JGRD  Greenfibre Mens Olive Camo 100% Cotton Slim Fi...   
1726      OOGD  JadeBlue Mens Fawn 100% Cotton Slim Fit Solid ...   
1727      OOGE  JadeBlue Mens Olive Green 100% Cotton Slim Fit...   
1728      OOGH  JadeBlue Mens Wine 100% Cotton Slim Fit Solid ...   

     ProductBrand  Gender  Price (INR)  \
0            DKNY  Unisex        11745   
1      EthnoVogue   Women         5810   
2          SPYKAR   Women          899   
3  

## Insert text and embeddings into vector store
Load text files into the index

In [20]:
from langchain.schema import Document

docs = []

for index, row in df.iterrows():
    document = Document(
        page_content = row['ProductBrand'] + " " + row['Description'].replace('â€', '').replace('Â', '').replace('©', '').replace('*', '').replace('•', '').replace('*', '').replace('“', '').replace('”', ''),
        metadata = {"id": row['ProductID'], "name": row['ProductName'], "brand": row['ProductBrand'], "gender": row['Gender'], "price": row['Price (INR)'], "color": row['PrimaryColor'], "image": row['ImageURL']},
    )
    docs.append(document)
vector_store.add_documents(documents=docs)

['OGQyZjE3OGUtYzJmZC00NmFiLTljYmEtZWIzOGRjN2UzZmY1',
 'MjZmZDA4MGMtOWJmYS00OGZhLTk5ZDktZWRlNTIxZThlZDAz',
 'YWM0ZWE0YzItZjkxZi00OTFjLTkzMjUtODUyNDllYjQ1MGU0',
 'MjdhYzI4NjktY2QxZC00YzRhLTliNjMtZGIzNTEzZmUxMGRj',
 'Zjk3OTBiMzItYzhhMi00YmFjLWE5NTMtYzkzMjc3ODZjZTVl',
 'NDMzNWUyYWUtZmY1Yi00ZWE0LThjOGItZjc4Y2ZmMzFiMDc2',
 'YmFmNmI0MTktMjg3My00NzVlLTg3MDItZTc3N2FkMWY4YjMx',
 'OWIwYmJiMDEtNTVlMS00NTVjLTljYzctZWVlNjM4MDBiOWI4',
 'OGU0MTg0NDUtNDVjNi00YTNiLTkyMDYtYjBlOWFlMzhhMjE1',
 'Nzc4NzgzYjUtODUyOS00NjhmLTg1NDEtODBjNDEyYmIxZTE3',
 'NzY1ZTdkMWQtMDczOC00ZDc3LTg4YmItYTE3MDM2Mzg1Zjk5',
 'Njk2OTI0MDMtMGY2MC00YzUwLTg3YjktYzAxOWQxYTRkZTlk',
 'YzI0ZTUwOTktZDhmMy00ZjdlLTlkZGQtNmYwNjAwYzE1OTZi',
 'M2RlODJhMzAtMzM1MC00MzhhLWJhNGQtOWM1ZWI0MjAwZTc2',
 'MzNkN2IwNWUtOWE1Yi00ZWEwLWJhNDktOWM5YjQwNzk0ZmEz',
 'M2ViZmZjZmEtOWY2Ni00ODUwLWIzZGUtMTc5ZDk4ZGEzMjg2',
 'ZjA4MTg2ZDctNmRlMS00ZGU4LWI0NmEtOGQyZDZlNzVjYzZh',
 'ZTIxOTMwYzgtZGNmZS00YjE2LWFjYTUtNWQxZWYyZDRiYzBk',
 'MGM4ZDMyOTUtZDYyMi00NjRmLWJiYzMtODcwNGQzMjc1

In [None]:
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

metadata_field_info = [
    AttributeInfo(
        name="name",
        description="The name of the product",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="brand",
        description="The brand of the product",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="gender",
        description="The gender the product is for",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="price",
        description="The price of the product",
        type="integer",
    ),
    AttributeInfo(
        name="color",
        description="The color of the product",
        type="string or list[string]",
    ),  
    AttributeInfo(
        name="image_url",
        description="The url of an image of the product",
        type="string",
    ),
]
document_content_description = "Brief product description of clothing"

In [None]:
from langchain.chat_models import AzureChatOpenAI
from langchain.retrievers import AzureCognitiveSearchRetriever

# Setup LLM and QA chain
#llm = AzureChatOpenAI(
#    deployment_name=completion_model,
#    temperature=0,
#    openai_api_base=openai.api_base,
#    openai_api_version=openai.api_version,
#)

#retriever = SelfQueryRetriever.from_llm(
#    llm,
#    vector_store,
#    document_content_description,
#    metadata_field_info,
#    verbose=True
#)

# Define retriever k determines the number of dicuments to return
#retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5, "fetch_k": 4})
retriever = AzureCognitiveSearchRetriever(
    service_name="gtlopenaidemisearch",
    api_key=vector_store_key,
    index_name=index_name,
    content_key="content", 
    top_k=10,
    )

In [None]:
# This example only specifies a relevant query
retriever.get_relevant_documents("What are some pepe jeans")

## Perform a vector similarity search

In [None]:
# Perform a similarity search
docs = vector_store.similarity_search(
    query="red pants",
    k=10,
    search_type="similarity",
)
results = [doc.page_content for doc in docs]
print("\n".join(results))

## Perform a hybrid search

In [None]:
# Perform a hybrid search
docs = vector_store.hybrid_search(
    query="What is challenger sales model",
    k=3, 
    search_type="hybrid"
)
results = [doc.page_content for doc in docs]
print("\n".join(results))

## Perform a Hybrid Search with Semantic re-ranking (powered by Bing)

In [None]:
# Perform a hybrid search with semantic reranking  
docs_and_scores = vector_store.semantic_hybrid_search_with_score(  
    query="What is challenger sales model",  
    k=3,  
)  
  
# Print the results  
for doc, score in docs_and_scores:  
    print("-" * 80)  
    answers = doc.metadata['answers']  
    if answers:  
        if answers.get('highlights'):  
            print(f"Semantic Answer: {answers['highlights']}")  
        else:  
            print(f"Semantic Answer: {answers['text']}")  
        print(f"Semantic Answer Score: {score}")  
    print("Content:", doc.page_content)  
    captions = doc.metadata['captions']
    print(f"Score: {score}") 
    if captions:  
        if captions.get('highlights'):  
            print(f"Caption: {captions['highlights']}")  
        else:  
            print(f"Caption: {captions['text']}")  
    else:  
        print("Caption not available")  
