In [1]:
import os
import openai
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import AzureSearch
from dotenv import load_dotenv

In [2]:
load_dotenv('.env')

##Sources
openai_search_source_endpoint = os.getenv("OPENAI_SEARCH_SOURCE_ENDPOINT")
openai_search_source_key = os.getenv("OPENAI_SEARCH_SOURCE_KEY")
openai_search_source_version = os.getenv("OPENAI_SEARCH_SOURCE_VERSION")
#models
#search
openai_search_model_endpoint = os.getenv("OPENAI_SEARCH_API_ENDPOINT")
openai_search_model_key = os.getenv("OPENAI_SEARCH_API_KEY")
openai_search_model_name = os.getenv("OPENAI_SEARCH_MODEL_NAME")
#completion
openai_completion_model_endpoint = os.getenv("OPENAI_COMPLETION_API_ENDPOINT")
openai_completion_model_key = os.getenv("OPENAI_COMPLETION_API_KEY")
openai_completion_model_name = os.getenv("OPENAI_COMPLETION_MODEL_NAME")
openai_completion_model_version = os.getenv("OPENAI_COMPLETION_API_VERSION")
#index
openai_index_name = os.getenv("OPENAI_SEARCH_INDEX_NAME")


In [4]:
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import SimpleField, SearchIndex, SearchFieldDataType
from azure.core.credentials import AzureKeyCredential
#Create an index
index_client = SearchIndexClient(endpoint=openai_search_model_endpoint, credential=AzureKeyCredential(openai_search_model_key))

index_schema = SearchIndex(
    name=openai_index_name, 
    fields=[ 
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),  
        SimpleField(name="content", type=SearchFieldDataType.String, searchable=True),  # Notes about the wine
        SimpleField(name="metadata", type=SearchFieldDataType.String, searchable=True)  # Notes about the wine
    ]
)

try:
    index_client.create_index(index_schema)
    print("Index created successfully!")
except Exception as e:
    print(f"Failed to create the index: {e}")

Index created successfully!


In [12]:
## openai == 0.28.1
embeddings = OpenAIEmbeddings(
    deployment = openai_search_model_name,   # nombre exacto de tu deployment en Azure OpenAI
    model = openai_search_model_name,         # opcional, depende de cómo creaste tu deployment
    openai_api_type = "azure",
    openai_api_base = openai_search_source_endpoint,
    openai_api_key = openai_search_source_key,
    openai_api_version = "2023-05-15"         # o la versión que uses en Azure OpenAI
)
## Connect to Azure
acs = AzureSearch(azure_search_endpoint = openai_search_model_endpoint ,
                  azure_search_key = openai_search_model_key,
                  index_name = openai_index_name,
                  embedding_function = embeddings.embed_query)

In [13]:
from langchain.document_loaders import CSVLoader

loader = CSVLoader(
    file_path="RAG_df2.csv",
    encoding="utf-8"
)
documents = loader.load()

In [14]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

acs.add_documents(documents=docs)

Created a chunk of size 1511, which is longer than the specified 1000
Created a chunk of size 2257, which is longer than the specified 1000
Created a chunk of size 1111, which is longer than the specified 1000
Created a chunk of size 1081, which is longer than the specified 1000
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='demo-app.openai.azure.com', port=443): Read timed out. (read timeout=600).


HttpResponseError: () Storage quota has been exceeded for this service. You must either delete documents first, or use a higher SKU for additional quota.
Code: 
Message: Storage quota has been exceeded for this service. You must either delete documents first, or use a higher SKU for additional quota.

In [15]:
docs = acs.similarity_search_with_relevance_scores(
    query="What is a good book of fantasy above 3 points of rating",
    k=5,
)
doc, score = docs[0]
print("Contenido:", doc.page_content)
print("Relevancia:", score)

Contenido: title: Queen of Sorcery (Belgariad (Paperback))
author: David Eddings
year: 1982
isbn: 0345323890
description: For use in schools and libraries only. A farm boy becomes involved in the struggle to recover the powerful Orb of Aldur and prevent the evil God Torak from taking control of the world.
pages: 327
genres: Fantasy fiction
rating: 5.0
Relevancia: 0.8536901


In [16]:
openai.api_base = openai_completion_model_endpoint # Your Azure OpenAI resource's endpoint value.
openai.api_key = openai_completion_model_key
openai.api_type = "azure"
openai.api_version = openai_completion_model_version

messages=[
    {"role": "system", "content": "Asisstant is a chatbot that helps you find the best wine for your taste."},
    {"role": "user", "content": "What is the best wine in Oregon above 92 points?"},
    {"role": "assistant", "content": docs[0][0].page_content}
]

response = openai.ChatCompletion.create(
    deployment_id=openai_completion_model_name,
    messages=messages,
)
from pprint import pprint
pprint(response)
print(response['choices'][0]['message']['content'])

{'choices': [{'content_filter_results': {'hate': {'filtered': False,
                                                  'severity': 'safe'},
                                         'protected_material_code': {'detected': False,
                                                                     'filtered': False},
                                         'protected_material_text': {'detected': False,
                                                                     'filtered': False},
                                         'self_harm': {'filtered': False,
                                                       'severity': 'safe'},
                                         'sexual': {'filtered': False,
                                                    'severity': 'safe'},
                                         'violence': {'filtered': False,
                                                      'severity': 'safe'}},
              'finish_reason': 'stop',
              'index': 0