In [15]:
import os
from openai import AzureOpenAI
from dotenv import load_dotenv

# Load .env file from the root directory
load_dotenv('../../.env')  # Go up two levels to the root

# Simple test to verify environment variables are loaded
print("Environment variables loaded:")
print(f"OPENAI_API_BASE: {os.getenv('OPENAI_API_BASE')}")
print(f"OPENAI_API_KEY: {'***' if os.getenv('OPENAI_API_KEY') else None}")
print(f"SEARCH_SERVICE_NAME: {os.getenv('SEARCH_SERVICE_NAME')}")
print(f"SEARCH_INDEX_NAME: {os.getenv('SEARCH_INDEX_NAME')}")

Environment variables loaded:
OPENAI_API_BASE: https://bogda-mflnsc12-eastus2.cognitiveservices.azure.com/openai/v1/
OPENAI_API_KEY: ***
SEARCH_SERVICE_NAME: https://demo-bogdan-search.search.windows.net
SEARCH_INDEX_NAME: demo-index


In [None]:
from azure.search.documents import SearchClient # replaces older AzureSearch()
from azure.core.credentials import AzureKeyCredential # for SearchClient
from openai import AzureOpenAI
import pandas as pd

# Check if required environment variables are set
required_vars = {
    'OPENAI_API_BASE': os.getenv('OPENAI_API_BASE'),
    'OPENAI_API_KEY': os.getenv('OPENAI_API_KEY'),
    'SEARCH_SERVICE_NAME': os.getenv('SEARCH_SERVICE_NAME'),
    'SEARCH_API_KEY': os.getenv('SEARCH_API_KEY'),
    'SEARCH_INDEX_NAME': os.getenv('SEARCH_INDEX_NAME')
}

print("Environment Variables Status:")
for var, value in required_vars.items():
    status = "✓ Set" if value else "✗ Missing"
    print(f"{var}: {status}")

# Only initialize clients if all required variables are set
if all(required_vars.values()):
    # Initialize Azure OpenAI client
    client = AzureOpenAI(
        api_key=os.getenv("OPENAI_API_KEY"),
        api_version=os.getenv("OPENAI_API_VERSION", "2023-07-01-preview"),
        azure_endpoint=os.getenv("OPENAI_API_BASE")
    )

    # Initialize Azure Search client  
    search_client = SearchClient(
        endpoint=os.getenv('SEARCH_SERVICE_NAME'),
        index_name=os.getenv('SEARCH_INDEX_NAME'),
        credential=AzureKeyCredential(os.getenv('SEARCH_API_KEY'))
    )

    print("\n✓ Clients initialized successfully!")
    print(f"Search endpoint: {os.getenv('SEARCH_SERVICE_NAME')}")
    print(f"Search index: {os.getenv('SEARCH_INDEX_NAME')}")
else:
    print("\n⚠️  Cannot initialize clients - missing required environment variables.")
    print("Please set the missing environment variables or create a .env file with:")
    print("OPENAI_API_BASE=your_azure_openai_endpoint")
    print("OPENAI_API_KEY=your_azure_openai_key") 
    print("SEARCH_SERVICE_NAME=your_search_service_endpoint")
    print("SEARCH_API_KEY=your_search_service_key")
    print("SEARCH_INDEX_NAME=your_search_index_name")

Environment Variables Status:
OPENAI_API_BASE: ✓ Set
OPENAI_API_KEY: ✓ Set
SEARCH_SERVICE_NAME: ✓ Set
SEARCH_API_KEY: ✓ Set
SEARCH_INDEX_NAME: ✓ Set





✓ Clients initialized successfully!
Search endpoint: https://demo-bogdan-search.search.windows.net
Search index: demo-index


In [4]:
from langchain.document_loaders import CSVLoader

# Load CSV from root directory with proper encoding to handle Unicode characters
loader = CSVLoader("../../wine-ratings.csv", encoding="utf-8") # works with langchain==0.0.329
documents = loader.load()

# Display the number of documents loaded
print(f"Loaded {len(documents)} documents from the CSV file")

Loaded 32780 documents from the CSV file


In [5]:
documents

[Document(page_content=': 0\nname: 1000 Stories Bourbon Barrel Aged Batch Blue Carignan 2016\ngrape: \nregion: Mendocino, California\nvariety: Red Wine\nrating: 91.0\nnotes: This is a very special, limited release of 1000 Stories Bourbon Barrel-Aged Carignan, their first-ever release of Carignan as a single varietal. Classic and rustic with a little edge. Look for notes of brilliantly racy red and black fruits set to a rich backdrop of toast, herbs, and cocoa.', metadata={'source': '../../wine-ratings.csv', 'row': 0}),
 Document(page_content=': 1\nname: 1000 Stories Bourbon Barrel Aged Gold Rush Red 2016\ngrape: \nregion: California\nvariety: Red Wine\nrating: 89.0\nnotes: The California Gold Rush was a period of courage, bravado and curiosity. It was with these characteristics in mind that we crafted Gold Rush Red. Grapes chosen from the golden hills and valleys of California were blended to create this bold, adventurous testament to that intrepid spirit.', metadata={'source': '../../

In [6]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
docs
#acs.add_documents(documents=docs)

[Document(page_content=': 0\nname: 1000 Stories Bourbon Barrel Aged Batch Blue Carignan 2016\ngrape: \nregion: Mendocino, California\nvariety: Red Wine\nrating: 91.0\nnotes: This is a very special, limited release of 1000 Stories Bourbon Barrel-Aged Carignan, their first-ever release of Carignan as a single varietal. Classic and rustic with a little edge. Look for notes of brilliantly racy red and black fruits set to a rich backdrop of toast, herbs, and cocoa.', metadata={'source': '../../wine-ratings.csv', 'row': 0}),
 Document(page_content=': 1\nname: 1000 Stories Bourbon Barrel Aged Gold Rush Red 2016\ngrape: \nregion: California\nvariety: Red Wine\nrating: 89.0\nnotes: The California Gold Rush was a period of courage, bravado and curiosity. It was with these characteristics in mind that we crafted Gold Rush Red. Grapes chosen from the golden hills and valleys of California were blended to create this bold, adventurous testament to that intrepid spirit.', metadata={'source': '../../

In [7]:
len(docs)

32780

In [8]:
docs[0]

Document(page_content=': 0\nname: 1000 Stories Bourbon Barrel Aged Batch Blue Carignan 2016\ngrape: \nregion: Mendocino, California\nvariety: Red Wine\nrating: 91.0\nnotes: This is a very special, limited release of 1000 Stories Bourbon Barrel-Aged Carignan, their first-ever release of Carignan as a single varietal. Classic and rustic with a little edge. Look for notes of brilliantly racy red and black fruits set to a rich backdrop of toast, herbs, and cocoa.', metadata={'source': '../../wine-ratings.csv', 'row': 0})

In [39]:
#search_client = AzureSearchClient(endpoint=search_endpoint, credential=search_api_key)
#from langchain_community.vectorstores import AzureSearch
#acs = AzureSearch(
#    client=search_client,
#    index_name="your-index-name",
#    embedding_function=embedding_function
#),
#search_client



In [9]:
#search_client.upload_documents # tried doing similar as acs.add_documents(documents=docs)

# First, let's check the index schema to understand what fields are expected
from azure.search.documents.indexes import SearchIndexClient

# Create index client to get schema information
index_client = SearchIndexClient(
    endpoint=os.getenv('SEARCH_SERVICE_NAME'),
    credential=AzureKeyCredential(os.getenv('SEARCH_API_KEY'))
)

try:
    # Get the index definition
    index = index_client.get_index(os.getenv('SEARCH_INDEX_NAME'))
    print("Index fields:")
    for field in index.fields:
        print(f"  - {field.name} ({field.type}) - Key: {field.key}, Searchable: {field.searchable}")
    
    print(f"\nSample LangChain document content:")
    if docs:
        print(f"Content: {docs[0].page_content[:200]}...")
        print(f"Metadata: {docs[0].metadata}")
        
except Exception as e:
    print(f"Error getting index schema: {e}")
    print("You may need to create the index first or check your credentials.")

Index fields:
  - id (Edm.String) - Key: True, Searchable: False
  - name (Edm.String) - Key: False, Searchable: True
  - grape (Edm.String) - Key: False, Searchable: True
  - region (Edm.String) - Key: False, Searchable: True
  - variety (Edm.String) - Key: False, Searchable: True
  - rating (Edm.Double) - Key: False, Searchable: False
  - notes (Edm.String) - Key: False, Searchable: True
  - content (Edm.String) - Key: False, Searchable: True

Sample LangChain document content:
Content: : 0
name: 1000 Stories Bourbon Barrel Aged Batch Blue Carignan 2016
grape: 
region: Mendocino, California
variety: Red Wine
rating: 91.0
notes: This is a very special, limited release of 1000 Stories ...
Metadata: {'source': '../../wine-ratings.csv', 'row': 0}


In [10]:
# Create a proper index schema for wine ratings data
from azure.search.documents.indexes.models import (
    SearchIndex,
    SearchField,
    SearchFieldDataType,
    SimpleField,
    SearchableField
)

# Define the index schema based on the wine data structure
index_name = os.getenv('SEARCH_INDEX_NAME')
fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True),
    SearchableField(name="name", type=SearchFieldDataType.String, searchable=True),
    SearchableField(name="grape", type=SearchFieldDataType.String, searchable=True),
    SearchableField(name="region", type=SearchFieldDataType.String, searchable=True),
    SearchableField(name="variety", type=SearchFieldDataType.String, searchable=True),
    SimpleField(name="rating", type=SearchFieldDataType.Double, filterable=True, sortable=True),
    SearchableField(name="notes", type=SearchFieldDataType.String, searchable=True),
    SearchableField(name="content", type=SearchFieldDataType.String, searchable=True),  # Full text content
]

# Create the index
search_index = SearchIndex(name=index_name, fields=fields)

try:
    # Delete existing index if it exists (to recreate with proper schema)
    try:
        index_client.delete_index(index_name)
        print(f"Deleted existing index: {index_name}")
    except:
        pass
    
    # Create new index with proper schema
    result = index_client.create_index(search_index)
    print(f"Created index '{result.name}' with {len(result.fields)} fields:")
    for field in result.fields:
        print(f"  - {field.name} ({field.type})")
        
except Exception as e:
    print(f"Error creating index: {e}")
    print("You may need proper permissions to create/modify indexes.")

Deleted existing index: demo-index
Created index 'demo-index' with 8 fields:
  - id (Edm.String)
  - name (Edm.String)
  - grape (Edm.String)
  - region (Edm.String)
  - variety (Edm.String)
  - rating (Edm.Double)
  - notes (Edm.String)
  - content (Edm.String)
Created index 'demo-index' with 8 fields:
  - id (Edm.String)
  - name (Edm.String)
  - grape (Edm.String)
  - region (Edm.String)
  - variety (Edm.String)
  - rating (Edm.Double)
  - notes (Edm.String)
  - content (Edm.String)


In [None]:
# Parse wine data and format for Azure Search
import re

def parse_wine_content(content):
    """Parse wine content string into structured fields"""
    lines = content.strip().split('\n')
    wine_data = {}
    
    for line in lines:
        if ':' in line:
            key, value = line.split(':', 1) # maximum 1 split
            key = key.strip()
            value = value.strip()
            wine_data[key] = value
    
    return wine_data

# Convert LangChain documents to properly structured Azure Search documents
azure_docs = []
for i, doc in enumerate(docs[:1000]):  # Process first 1000 documents to avoid timeout
    # Parse the wine content
    wine_data = parse_wine_content(doc.page_content)
    
    # Create Azure Search document with proper field mapping
    azure_doc = {
        "id": str(i),
        "name": wine_data.get("name", ""),
        "grape": wine_data.get("grape", ""),
        "region": wine_data.get("region", ""),
        "variety": wine_data.get("variety", ""),
        "rating": float(wine_data.get("rating", 0)) if wine_data.get("rating") and wine_data.get("rating") != "" else 0.0,
        "notes": wine_data.get("notes", ""),
        "content": doc.page_content  # Full content for search
    }
    azure_docs.append(azure_doc)

print(f"Formatted {len(azure_docs)} documents for Azure Search")
print(f"Sample document:")
if azure_docs:
    sample = azure_docs[0]
    for key, value in sample.items():
        print(f"  {key}: {str(value)[:50]}{'...' if len(str(value)) > 50 else ''}")

# Upload documents to Azure Search
try:
    print(f"\nUploading {len(azure_docs)} documents...")
    result = search_client.upload_documents(documents=azure_docs)
    
    # Process results
    successful = 0
    errors = []
    
    for res in result:
        if hasattr(res, 'succeeded') and res.succeeded:
            successful += 1
        elif hasattr(res, 'status_code') and res.status_code == 200:
            successful += 1  
        else:
            errors.append(res)
    
    print(f"Successfully uploaded {successful}/{len(result)} documents to Azure Search")
    
    # Show any errors
    if errors:
        print(f"Errors: {len(errors)}")
        for error in errors[:3]:  # Show first 3 errors
            print(f"  Error: {error}")
    else:
        print("✅ All documents uploaded successfully!")
            
except Exception as e:
    print(f"Error uploading documents: {e}")
    print("Check if the index exists and your credentials are correct.")

Formatted 1000 documents for Azure Search
Sample document:
  id: 0
  name: 1000 Stories Bourbon Barrel Aged Batch Blue Carign...
  grape: 
  region: Mendocino, California
  variety: Red Wine
  rating: 91.0
  notes: This is a very special, limited release of 1000 St...
  content: : 0
name: 1000 Stories Bourbon Barrel Aged Batch B...

Uploading 1000 documents...
Successfully uploaded 1000/1000 documents to Azure Search
✅ All documents uploaded successfully!
Successfully uploaded 1000/1000 documents to Azure Search
✅ All documents uploaded successfully!


In [12]:
# Perform similarity search using Azure Search client (replacing acs functionality)
query = "What is the best Cabernet Sauvignon wine in Napa Valley above 94 points"

# Search the Azure Search index
search_results = search_client.search(
    search_text=query,
    top=5,  # Get top 5 results
    select=["id", "name", "grape", "region", "variety", "rating", "notes", "content"],
    # Note: filtering by rating may need adjustment based on actual data
)

print(f"Search query: {query}")
print(f"Results:")
print("-" * 50)

results_list = list(search_results)
for i, result in enumerate(results_list):
    print(f"{i+1}. {result['name']}")
    print(f"   Region: {result.get('region', 'N/A')}")
    print(f"   Variety: {result.get('variety', 'N/A')}")
    print(f"   Rating: {result.get('rating', 'N/A')}")
    notes = result.get('notes', '')
    print(f"   Notes: {notes[:100]}{'...' if len(notes) > 100 else ''}")
    print(f"   Search Score: {result.get('@search.score', 'N/A')}")
    print()

# Show first result content (similar to original code)
if results_list:
    print("First result full content:")
    print(results_list[0]['content'])
    print(f"\nFirst result has these fields: {list(results_list[0].keys())}")
    
    # Show what this replaces from original acs code
    print(f"\n--- Equivalent to original acs code ---")
    print(f"docs[0][0].page_content equivalent: {results_list[0]['content']}")
    print(f"Available methods/fields: {list(results_list[0].keys())}")
else:
    print("No results found matching the criteria.")

Search query: What is the best Cabernet Sauvignon wine in Napa Valley above 94 points
Results:
--------------------------------------------------
1. Addendum Napa Valley Cabernet Sauvignon 2016
   Region: Napa Valley, California
   Variety: Red Wine
   Rating: 90.0
   Notes: Addendum Napa Valley Cabernet Sauvignon offers aromas of black cherry, red currant, cigar box, damp ...
   Search Score: 43.08507

2. 1881 Napa Cabernet Sauvignon 2018
   Region: Napa Valley, California
   Variety: Red Wine
   Rating: 90.0
   Notes: Harvested from vineyards across Napa Valley, this bold Cabernet Sauvignon is a blend of warmer clima...
   Search Score: 38.991734

3. Acre Cabernet Sauvignon 2015
   Region: Napa Valley, California
   Variety: Red Wine
   Rating: 93.0
   Notes: The 2015 Cabernet Sauvignon is the perfect representation of Napa Valley Cabernet, from its deep rub...
   Search Score: 36.279778

4. Aloft Howell Mountain Cabernet Sauvignon 2008
   Region: Howell Mountain, Napa Valley, Califo

In [38]:
# Search with rating filter using query syntax - find wines above 94 points
# Note: Using the original natural language query
query = "What is the best Cabernet Sauvignon wine in Napa Valley above 94 points"

# Search using query syntax for filtering
search_results = search_client.search(
    search_text=query,
    top=20,  # Get more results to find high-rated wines
    select=["id", "name", "grape", "region", "variety", "rating", "notes", "content"]
)

print(f"Search query (with rating filter): {query}")
print(f"Looking for wines above 94 points...")
print("-" * 60)

results_list = list(search_results)
# Sort by rating in Python (descending order - highest first)
results_list.sort(key=lambda x: x.get('rating', 0), reverse=True)

if results_list:
    for i, result in enumerate(results_list):
        print(f"{i+1}. {result['name']} - Rating: {result.get('rating', 'N/A')}")
        print(f"    Region: {result.get('region', 'N/A')}")
        print(f"    Variety: {result.get('variety', 'N/A')}")
        notes = result.get('notes', '')
        print(f"    Notes: {notes[:150]}{'...' if len(notes) > 150 else ''}")
        print(f"    Search Score: {result.get('@search.score', 'N/A')}")
        print()
    
    print(f"\nFound {len(results_list)} wines with the search criteria!")
    if results_list:
        print(f"Highest rated: {results_list[0].get('name', 'N/A')} with {results_list[0].get('rating', 'N/A')} points")
else:
    print("No wines found with this search criteria.")
    
    # Try alternative search approaches
    print("\nTrying alternative search approaches...")
    
    # Try searching for just high ratings
    rating_query = "rating:>94"
    rating_search = search_client.search(
        search_text=rating_query,
        top=20,
        select=["id", "name", "grape", "region", "variety", "rating", "notes"]
    )
    
    rating_results = list(rating_search)
    rating_results.sort(key=lambda x: x.get('rating', 0), reverse=True)
    
    if rating_results:
        print(f"Found {len(rating_results)} high-rated wines (>94 points):")
        for i, result in enumerate(rating_results[:10]):  # Show top 10
            print(f"{i+1}. {result['name']} - {result.get('rating', 'N/A')} points")
            print(f"    Region: {result.get('region', 'N/A')}")
    else:
        # Final attempt - search for text content mentioning high ratings
        text_search = search_client.search(
            search_text="Cabernet Sauvignon Napa Valley",
            top=50,
            select=["id", "name", "grape", "region", "variety", "rating", "notes"]
        )
        
        text_results = list(text_search)
        # Filter in Python for ratings > 94
        high_rated = [r for r in text_results if r.get('rating', 0) > 94]
        high_rated.sort(key=lambda x: x.get('rating', 0), reverse=True)
        
        if high_rated:
            print(f"Found {len(high_rated)} Cabernet Sauvignon wines from Napa Valley above 94 points:")
            for i, result in enumerate(high_rated[:10]):
                print(f"{i+1}. {result['name']} - {result.get('rating', 'N/A')} points")
        else:
            print("No high-rated wines found with any approach.")

Search query (with rating filter): What is the best Cabernet Sauvignon wine in Napa Valley above 94 points
Looking for wines above 94 points...
------------------------------------------------------------
1. Alejandro Bulgheroni Lithology Beckstoffer To Kalon Vineyard Cabernet Sauvignon 2016 - Rating: 97.0
    Region: Napa Valley, California
    Variety: Red Wine
    Notes: Alejandro Bulgheroni Estate Lithology is a series of single vineyard and AVA-designated wines made from select sites within Napa Valley. Lithology Bec...
    Search Score: 27.93148

2. Alejandro Bulgheroni Lithology Beckstoffer Dr. Crane Vineyard Cabernet Sauvignon 2017 - Rating: 94.0
    Region: Napa Valley, California
    Variety: Red Wine
    Notes: Alejandro Bulgheroni Estate Lithology is a series of single vineyard and AVA-designated wines made from select sites within Napa Valley. The Dr. Crane...
    Search Score: 30.772947

3. Alejandro Bulgheroni Lithology Beckstoffer To Kalon Vineyard Cabernet Sauvignon 20

In [14]:
query = "What is the best Cabernet Sauvignon wine in Napa Valley above 94 points"

search_results = search_client.search(
        search_text=query,
        top=20,  # Get more results to find high-rated wines
        select=["id", "name", "grape", "region", "variety", "rating", "notes", "content"]
    )
top_results = list(search_results)
#print([top_results[i]['content'] for i in range(len(top_results))] if top_results else 'No results found.')
print(top_results if top_results else 'No results found.')



[{'variety': 'Red Wine', 'name': 'Addendum Napa Valley Cabernet Sauvignon 2016', 'grape': '', 'region': 'Napa Valley, California', 'rating': 90.0, 'id': '356', 'content': ": 356\nname: Addendum Napa Valley Cabernet Sauvignon 2016\ngrape: \nregion: Napa Valley, California\nvariety: Red Wine\nrating: 90.0\nnotes: Addendum Napa Valley Cabernet Sauvignon offers aromas of black cherry, red currant, cigar box, damp earth, cloves, boysenberry, baking spices, and baker's chocolate  Flavors of black cherry, dark chocolate, baking spices, black currant, vanilla, and earth.", 'notes': "Addendum Napa Valley Cabernet Sauvignon offers aromas of black cherry, red currant, cigar box, damp earth, cloves, boysenberry, baking spices, and baker's chocolate  Flavors of black cherry, dark chocolate, baking spices, black currant, vanilla, and earth.", '@search.score': 43.08507, '@search.reranker_score': None, '@search.highlights': None, '@search.captions': None}, {'variety': 'Red Wine', 'name': '1881 Napa Ca

In [39]:
# ORIGINAL VERSION - Basic RAG function (kept unchanged)- gives excellent results
from openai import OpenAI

endpoint = "https://bogda-mflnsc12-eastus2.cognitiveservices.azure.com/openai/v1/"
model_name = "gpt-35-turbo"
deployment_name = "gpt-35-turbo-2"

api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI(
    base_url=f"{endpoint}",
    api_key=api_key
)

def ai_RAG_chat(user_message):
    # Search using query syntax for filtering
    search_results = search_client.search(
        search_text=user_message,
        top=20,
        select=["id", "name", "grape", "region", "variety", "rating", "notes", "content"]
    )
    top_results = list(search_results)

    # Simple context formation using top_results directly
    context = str(top_results)
    
    message_text = [
        # {"role": "system", "content": "You are a knowledgeable wine sommelier assistant."},
        # {"role": "system", "content": context},
        # {"role": "user", "content": user_message}
        {"role": "system", "content": "Assistant is a chatbot that helps you find the best wine for your taste."},
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": context}
    ]

    completion = client.chat.completions.create(
      model=deployment_name,
      messages=message_text,
      temperature=0.7,
      max_tokens=4096,
      top_p=0.95,
      frequency_penalty=0,
      presence_penalty=0,
      stop=None
    )
    return completion

print("Original RAG function 'ai_RAG_chat()' defined and ready to use!")

## COMMENTED OUT TO STOP AUTOMATIC EXECUTION:
# print("Welcome! how can I help you today?")
# query = "What is the best Cabernet Sauvignon wine in Napa Valley above 94 points"
# completion = ai_RAG_chat(query)
# print("ORIGINAL RAG Response:")
# print(completion.choices[0].message.content)

Original RAG function 'ai_RAG_chat()' defined and ready to use!


In [40]:
from openai import OpenAI
#import os
#from dotenv import load_dotenv

#load_dotenv()

# This code works with the new OpenAI Python SDK, e.g. openai==1.108.1

endpoint = "https://bogda-mflnsc12-eastus2.cognitiveservices.azure.com/openai/v1/"
model_name = "gpt-35-turbo"
deployment_name = "gpt-35-turbo-2"

api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI(
    base_url=f"{endpoint}",
    api_key=api_key
)


def ai_RAG_chat_2(user_message):
    # Search using query syntax for filtering
    search_results = search_client.search(
        search_text=user_message,
        top=20,  # Get more results to find high-rated wines
        select=["id", "name", "grape", "region", "variety", "rating", "notes", "content"]
    )
    top_results = list(search_results)



    # Prepare context from all search results.
    # A difference in this cell compared from the previous one:
    # Proper Context Formation: Instead of trying to pass a list of content directly (as a string),
    # I now format all the search results into a comprehensive context string that includes:
    # Wine name, Region, Variety, Rating, Tasting notes, Full content details
    
    if top_results:
        context = "Here are the relevant wine recommendations from our database:\n\n"
        for i, result in enumerate(top_results, 1):
            context += f"Wine {i}:\n"
            context += f"Name: {result.get('name', 'N/A')}\n"
            context += f"Region: {result.get('region', 'N/A')}\n"
            context += f"Variety: {result.get('variety', 'N/A')}\n"
            context += f"Rating: {result.get('rating', 'N/A')}\n"
            context += f"Notes: {result.get('notes', 'N/A')}\n"
            context += f"Full Details: {result.get('content', 'N/A')}\n\n"
        
        system_prompt = "You are a knowledgeable wine sommelier assistant. Use the provided wine information to give detailed recommendations and answer questions about wines. Focus on the specific wines provided in the context."
    else:
        context = "No relevant wine information found in our database."
        system_prompt = "You are a wine sommelier assistant. Unfortunately, no specific wine information was found for this query."

    message_text = [
        {"role": "system", "content": system_prompt},
        {"role": "system", "content": context},
        {"role": "user", "content": user_message}
    ]

    completion = client.chat.completions.create(
      model=deployment_name,
      messages=message_text,
      temperature=0.7,
      max_tokens=4096,
      top_p=0.95,
      frequency_penalty=0,
      presence_penalty=0,
      stop=None
    )
    return completion

# Function defined but not executed
print("Enhanced RAG function 'ai_RAG_chat_2' is now available!")
print("Call ai_RAG_chat_2('your question') to use it.")

## COMMENTED OUT THESE LINES TO STOP AUTOMATIC EXECUTION:
# print("Welcome! how can I help you today?")
# query = "What is the best Cabernet Sauvignon wine in Napa Valley above 94 points"
# completion = ai_RAG_chat_2(query)
# # Completion will return a response that we need to use to get the acctual string
# print(completion.choices[0].message.content)

Enhanced RAG function 'ai_RAG_chat_2' is now available!
Call ai_RAG_chat_2('your question') to use it.


In [None]:
# SIMPLIFIED VERSION - Basic RAG function (kept unchanged)- gives excellent results
from openai import OpenAI

endpoint = "https://bogda-mflnsc12-eastus2.cognitiveservices.azure.com/openai/v1/"
model_name = "gpt-35-turbo"
deployment_name = "gpt-35-turbo-2"

api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI(
    base_url=f"{endpoint}",
    api_key=api_key
)

def ai_RAG_chat_3(user_message):
    # Search using query syntax for filtering
    search_results = search_client.search(
        search_text=user_message,
        top=20,
        #select=["id", "name", "grape", "region", "variety", "rating", "notes", "content"]
        select=["content"] # using only content field to make it simpler and avoid token limit issues
    )
    top_results = list(search_results)

    # Simple context formation using top_results directly
    context = str(top_results)
    
    message_text = [
        # {"role": "system", "content": "You are a knowledgeable wine sommelier assistant."},
        # {"role": "system", "content": context},
        # {"role": "user", "content": user_message}
        {"role": "system", "content": "Assistant is a chatbot that helps you find the best wine for your taste."},
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": context}
    ]

    completion = client.chat.completions.create(
      model=deployment_name,
      messages=message_text,
      temperature=0.7,
      max_tokens=4096,
      top_p=0.95,
      frequency_penalty=0,
      presence_penalty=0,
      stop=None
    )
    return completion

print("Simplified RAG function 'ai_RAG_chat_3()' defined and ready to use!")

## COMMENTED OUT TO STOP AUTOMATIC EXECUTION:
# print("Welcome! how can I help you today?")
# query = "What is the best Cabernet Sauvignon wine in Napa Valley above 94 points"
# completion = ai_RAG_chat_3(query)
# print("Simplified RAG Response:")
# print(completion.choices[0].message.content)

Simplified RAG function 'ai_RAG_chat_3()' defined and ready to use!


In [43]:
# prueba (para la mejora probar sin content, o con content solo. Lo mismo cuando armo el indice):
query = "What is the best Cabernet Sauvignon wine in Napa Valley above 94 points"

search_results = search_client.search(
        search_text=query,
        top=2,  # Get more results to find high-rated wines
        #select=["id", "name", "grape", "region", "variety", "rating", "notes", "content"]# prueba0
        #select=["id", "name", "grape", "region", "variety", "rating", "notes"]# prueba1
        select=["content"]# prueba2
    )
top_results = list(search_results)



    # Prepare context from all search results.
    # A difference in this cell compared from the previous one:
    # Proper Context Formation: Instead of trying to pass a list of content directly (as a string),
    # I now format all the search results into a comprehensive context string that includes:
    # Wine name, Region, Variety, Rating, Tasting notes, Full content details
    
if top_results:
    context = "Here are the relevant wine recommendations from our database:\n\n"
    for i, result in enumerate(top_results, 1):
        context += f"Wine {i}:\n"
        context += f"Name: {result.get('name', 'N/A')}\n"
        context += f"Region: {result.get('region', 'N/A')}\n"
        context += f"Variety: {result.get('variety', 'N/A')}\n"
        context += f"Rating: {result.get('rating', 'N/A')}\n"
        context += f"Notes: {result.get('notes', 'N/A')}\n"
        context += f"Full Details: {result.get('content', 'N/A')}\n\n"

print(context)

Here are the relevant wine recommendations from our database:

Wine 1:
Name: N/A
Region: N/A
Variety: N/A
Rating: N/A
Notes: N/A
Full Details: : 356
name: Addendum Napa Valley Cabernet Sauvignon 2016
grape: 
region: Napa Valley, California
variety: Red Wine
rating: 90.0
notes: Addendum Napa Valley Cabernet Sauvignon offers aromas of black cherry, red currant, cigar box, damp earth, cloves, boysenberry, baking spices, and baker's chocolate  Flavors of black cherry, dark chocolate, baking spices, black currant, vanilla, and earth.

Wine 2:
Name: N/A
Region: N/A
Variety: N/A
Rating: N/A
Notes: N/A
Full Details: : 30
name: 1881 Napa Cabernet Sauvignon 2018
grape: 
region: Napa Valley, California
variety: Red Wine
rating: 90.0
notes: Harvested from vineyards across Napa Valley, this bold Cabernet Sauvignon is a blend of warmer climate hillside vineyard fruit and valley floor fruit creating a wine that celebrates the region’s diversity and complexity. A deep dark purple in the glass, this wi

In [None]:
# Test original version
query = "What is the best Cabernet Sauvignon wine in Napa Valley above 94 points"
completion = ai_RAG_chat(query)
print(completion.choices[0].message.content)

Here are some highly rated Cabernet Sauvignon wines from Napa Valley with ratings above 94 points:

1. Alejandro Bulgheroni Lithology Beckstoffer To Kalon Vineyard Cabernet Sauvignon 2016 - Rating: 97.0
2. Alejandro Bulgheroni Lithology Beckstoffer To Kalon Vineyard Cabernet Sauvignon 2017 - Rating: 94.0
3. Alejandro Bulgheroni Lithology Beckstoffer Dr. Crane Vineyard Cabernet Sauvignon 2017 - Rating: 94.0

These wines are known for their exceptional quality and are worth trying if you are looking for a top-rated Cabernet Sauvignon from Napa Valley.


In [None]:
# Test the second version
query = "What is the best Cabernet Sauvignon wine in Napa Valley above 94 points"
completion = ai_RAG_chat_2(query)
print(completion.choices[0].message.content)

The best Cabernet Sauvignon wine in Napa Valley above 94 points is the "Alejandro Bulgheroni Lithology Beckstoffer To Kalon Vineyard Cabernet Sauvignon 2016," which has a rating of 97.0. This wine is highly rated and is produced solely from fruit grown in the historic Beckstoffer To Kalon Vineyard, which was planted in 1865 and represents Oakville's famous bench land. It is part of the Alejandro Bulgheroni Estate Lithology series, known for showcasing single vineyard and AVA-designated wines from select sites within Napa Valley.


In [46]:
# Test the third (simplified) version
query = "What is the best Cabernet Sauvignon wine in Napa Valley above 94 points"
completion = ai_RAG_chat_3(query)
print("Simplified RAG Response:")
print(completion.choices[0].message.content)

Simplified RAG Response:
Here are some highly rated Cabernet Sauvignon wines from Napa Valley above 94 points:

1. Alejandro Bulgheroni Lithology Beckstoffer Dr. Crane Vineyard Cabernet Sauvignon 2017 - Rating: 94.0
2. Alejandro Bulgheroni Lithology Beckstoffer To Kalon Vineyard Cabernet Sauvignon 2016 - Rating: 97.0
3. Acre Cabernet Sauvignon 2015 - Rating: 93.0
4. Alejandro Bulgheroni Lithology Beckstoffer To Kalon Vineyard Cabernet Sauvignon 2017 - Rating: 94.0

These wines are known for their exceptional quality and high ratings. Enjoy exploring these options!


In [50]:
# Test the third (simplified) version, with more explicit instructions
query = "What is the best Cabernet Sauvignon wine in Napa Valley above 94 points. Do not list Cabernet Sauvignon wines that do not achieve at least 95 points."
completion = ai_RAG_chat_3(query)
print("Simplified RAG Response:")
print(completion.choices[0].message.content)

Simplified RAG Response:
Here are some Cabernet Sauvignon wines from Napa Valley that have achieved at least 95 points:

1. Alejandro Bulgheroni Lithology Beckstoffer To Kalon Vineyard Cabernet Sauvignon 2016 - Rating: 97.0
2. Abreu Vineyards Cappella 2007 - Rating: 96.0


In [None]:
print("Welcome! how can I help you today?")
while True:
    query = input(">> ")
    completion = ai_RAG_chat(query)
    print(completion.choices[0].message.content)

Here are some highly-rated wines from Oregon above 92 points:

1. A to Z Pinot Noir 2017 - Willamette Valley, Oregon (Rating: 92.0)
2. Adelsheim Pinot Gris 2013 - Willamette Valley, Oregon (Rating: 91.0)
3. Adelsheim Pinot Gris 2014 - Willamette Valley, Oregon (Rating: 91.0)
4. Adelsheim Pinot Gris 2001 - Oregon (Rating: 91.0)

These wines have received excellent ratings and are sure to impress with their quality and taste.
The first wine is the Albert Bichot Chablis Domaine Long Depaquit 2014 from Chablis, Burgundy, France. It is a white wine with a rating of 89.0. This Chablis has a crystalline limpidity and a pale yellow color with light green highlights. The wine bursts with mineral precision and finesse, offering a pure nose typical of Chablis with notes of white flowers and flint. The palate is elegant with almond notes and lovely vivacity, making it an easy-drinking wine that is best enjoyed within the next 3 to 5 years.
The first wine is the Albert Bichot Chablis Domaine Long D

KeyboardInterrupt: Interrupted by user

In [52]:
print("Welcome! how can I help you today?")
while True:
    query = input(">> ")
    completion = ai_RAG_chat_2(query)
    print(completion.choices[0].message.content)

Welcome! how can I help you today?
The highest-rated wine from Oregon above 92 points in our database is the **A to Z Pinot Noir 2017** from the Willamette Valley, with a rating of 93.0. This wine showcases a vibrant and complex nose with a harmonious balance between tannin, acid, and fruit, making it an excellent choice for Pinot Noir enthusiasts seeking a top-quality Oregon wine.
It seems there might be a typo in the wine descriptions you provided. Did you mean to ask about something specific regarding the wines mentioned, or is there a particular wine you would like more information about or recommendations for? Feel free to ask any questions or specify your preferences!
Goodbye! If you have any more questions in the future, feel free to ask. Have a great day!
Hello! How can I assist you with the wine information provided?


KeyboardInterrupt: Interrupted by user

In [None]:
print("Welcome! how can I help you today?")
while True:
    query = input(">> ")
    completion = ai_RAG_chat_3(query)
    print(completion.choices[0].message.content)

Based on the search results, one of the best wines from Oregon above 92 points is the A to Z Pinot Noir 2017 from Willamette Valley, Oregon, which has a rating of 93 points. This wine showcases a vibrant and complex nose with floral, stone fruit, and berry aromas, along with hints of spice and wood notes. It offers a delicious balance between tannin, acid, and fruit, making it a prime example of Oregon Pinot Noir.
I have found some wine recommendations for you:

1. 1000 Stories Bourbon Barrel Aged Batch Blue Carignan 2016 from Mendocino, California - A limited release Carignan with notes of racy red and black fruits, toast, herbs, and cocoa.
2. 1000 Stories Bourbon Barrel Aged Gold Rush Red 2016 from California - A bold red blend inspired by the California Gold Rush, featuring flavors of black fruits and spices.
3. 1000 Stories Bourbon Barrel Aged Zinfandel 2013 from North Coast, California - A rich Zinfandel with intense raspberry fruit, black pepper spice, and a touch of cloves and h

KeyboardInterrupt: Interrupted by user

In [53]:
# Interactive wine chat with ai_RAG_chat_3 - Enhanced with exit options
print("Interactive Wine Sommelier Chat")
print("Type 'quit', 'exit', or 'q' to stop the conversation")
print("=" * 50)

while True:
    try:
        query = input(">> ")
        
        # Exit conditions
        if query.lower() in ['quit', 'exit', 'q', 'stop']:
            print("Thanks for using the Wine Sommelier! Goodbye!")
            break
            
        # Skip empty inputs
        if not query.strip():
            continue
            
        # Process the query
        completion = ai_RAG_chat_3(query)
        print("\nWine Sommelier Response:")
        print("-" * 30)
        print(completion.choices[0].message.content)
        print("\n" + "=" * 50)
        
    except KeyboardInterrupt:
        print("\n\nChat interrupted. Goodbye!")
        break
    except Exception as e:
        print(f"\nError: {e}")
        print("Please try again or type 'quit' to exit.\n")

Interactive Wine Sommelier Chat
Type 'quit', 'exit', or 'q' to stop the conversation

Wine Sommelier Response:
------------------------------
One highly-rated white wine from Spain is the "A Coroa Godello 2009" with a rating of 91.0. It has aromas of lemon peel on the nose, with citrus melons blending into herbaceous minerals on the palate, ending with a bold finish.


Wine Sommelier Response:
------------------------------
Here are some red wine options for you:

1. A to Z Pinot Noir 2006 - Willamette Valley, Oregon
2. Acinum Amarone della Valpolicella 2012 - Valpolicella, Veneto, Italy
3. Alejandro Bulgheroni Lithology Beckstoffer Dr. Crane Vineyard Cabernet Sauvignon 2017 - Napa Valley, California
4. Allegrini La Grola 2011 - Veneto, Italy

These are just a few suggestions. Let me know if you'd like more information or recommendations!


Wine Sommelier Response:
------------------------------
Here are some red wines from Spain that you might enjoy:

1. Acustic Celler Acustic Montsan