### **Load Environment variables from .env file**

In [1]:
from openai import AzureOpenAI
import faiss
from dotenv import load_dotenv
from IPython.display import display, HTML
import os

In [2]:
load_dotenv()
AISTUDIO_AZURE_OPENAI_ENDPOINT=os.getenv("AISTUDIO_AZURE_OPENAI_ENDPOINT")
AISTUDIO_AZURE_OPENAI_KEY= os.getenv("AISTUDIO_AZURE_OPENAI_KEY")
AISTUDIO_AZURE_OPENAI_EMBEDDING_DEPLOYMENT = "text-embedding-3-large"
AISTUDIO_AZURE_OPENAI_EMBEDDING_MODELNAME = "text-embedding-3-large"

In [3]:
# testing embeddings 

import os
from openai import AzureOpenAI

client = AzureOpenAI(
    api_version="2024-02-01",
    azure_endpoint =AISTUDIO_AZURE_OPENAI_ENDPOINT,
    api_key=AISTUDIO_AZURE_OPENAI_KEY
)

response = client.embeddings.create(
    input=["first phrase","second phrase","third phrase"],
    model=AISTUDIO_AZURE_OPENAI_EMBEDDING_MODELNAME
)

for item in response.data:
    length = len(item.embedding)
    print(
        f"data[{item.index}]: length={length}, "
        f"[{item.embedding[0]}, {item.embedding[1]}, "
        f"..., {item.embedding[length-2]}, {item.embedding[length-1]}]"
    )
print(response.usage)

data[0]: length=3072, [0.022330209612846375, -0.002088305074721575, ..., -0.014379994943737984, 0.006100048776715994]
data[1]: length=3072, [0.011640272103250027, 0.005252661183476448, ..., -0.028720801696181297, -0.0025770869106054306]
data[2]: length=3072, [0.016326788812875748, -0.0018455119570717216, ..., -0.005349587649106979, 0.006049444433301687]
Usage(prompt_tokens=6, total_tokens=6)


In [4]:
# Create a vector store with a sample text
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(
    model=AISTUDIO_AZURE_OPENAI_EMBEDDING_MODELNAME,
    azure_endpoint=AISTUDIO_AZURE_OPENAI_ENDPOINT,
    api_key=AISTUDIO_AZURE_OPENAI_KEY,
    openai_api_version="2024-02-01"
)
text = "Semantic kernel is the framework for building context-aware reasoning applications"

vectorstore = InMemoryVectorStore.from_texts(
    [text],
    embedding=embeddings,
)

# Use the vectorstore as a retriever
retriever = vectorstore.as_retriever()

# Retrieve the most similar text
retrieved_documents = retriever.invoke("What is Semantic kernel?")

# show the retrieved document's content
retrieved_documents[0].page_content

'Semantic kernel is the framework for building context-aware reasoning applications'

In [22]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("./schema/schema.csv")

# convert df[schema] to a list of strings
texts = df["table_schema"].tolist()

vectorstore = InMemoryVectorStore.from_texts(
    texts,
    embedding=embeddings,
)

# Use the vectorstore as a retriever
# retrieve 2 most similar documents
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})

In [23]:
# retrieve 2 most similar documents
retrieved_documents = retriever.invoke("How many books are in the bookstore?")

# show the retrieved document's content
for i, doc in enumerate(retrieved_documents):
    display(HTML(f"<pre>Document {i+1}:\n{doc.page_content}</pre>"))