# Lesson3: Projections

In [None]:
import custom_utils

In [None]:
from datasets import load_dataset
import pandas as pd

datasets = load_dataset("MongoDB/airbnb_embeddings", streaming=True, split="train")
dataset = datasets.take(100)
dataset_df = pd.DataFrame(dataset)
dataset_df.head(5)

In [None]:
print("Columns:", dataset_df.columns)

# Document modeling

In [None]:
listings = custom_utils.process_records(dataset_df)

# Database creation and connection

In [None]:
db, collection = custom_utils.connect_to_database()

# Data ingestion

In [None]:
collection.insert_many(listings)

# Vector search index defination

In [None]:
custom_utils.setup_vector_search_index_with_filter(collection=collection)

# Handling user query

In [None]:
from pydantic import BaseModel
from typing import Optional

class SearchResultItem(BaseModel):
    name: str
    accommodates: Optional[int] = None
    address: custom_utils.Address
    summary: Optional[str] = None
    space: Optional[str] = None
    neighborhood_overview: Optional[str] = None
    notes: Optional[str] = None
    score: Optional[float]=None

In [None]:
from IPython.display import display, HTML

def handle_user_query(query, db, collection, stages=[], vector_index="vector_index_text"):
    get_knowledge=custom_utils.vector_search_with_filter(query,db,collection,stages,vector_index)
    if not get_knowledge:
        return "No results found.", "No source information available."
    
    print("List of all fields of the first document, before model conformance")
    print(get_knowledge[0].keys())

    search_results_models = [
        SearchResultItem(**result)
        for result in get_knowledge
    ]

    search_results_df = pd.DataFrame([item.dict() for item in search_results_models])

    completion = custom_utils.openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system", 
                "content": "You are a airbnb listing recommendation system."},
            {
                "role": "user", 
                "content": f"Answer this user query: {query} with the following context:\n{search_results_df}"
            }
        ]
    )
    
    system_response=completion.choices[0].message.content
    print(f"- User Question:\n{query}\n")
    print(f"- System Response:\n{system_response}\n")
    display(HTML(search_results_df.to_html()))
    return system_response

# Adding a projection stage

In [None]:

projection_stage = {
    "$project": {
        "_id": 0,  
        "name": 1,
        "accommodates": 1,
        "address.street": 1,
        "address.government_area": 1, 
        "address.market": 1,
        "address.country": 1, 
        "address.country_code": 1, 
        "address.location.type": 1, 
        "address.location.coordinates": 1,  
        "address.location.is_location_exact": 1,
        "summary": 1,
        "space": 1,  
        "neighborhood_overview": 1, 
        "notes": 1, 
        "score": {"$meta": "vectorSearchScore"} 
    }
}

additional_stages = [projection_stage]

In [None]:
query = """
I want to stay in a place that's warm and friendly, 
and not too far from resturants, can you recommend a place? 
Include a reason as to why you've chosen your selection"
"""
handle_user_query(
    query, 
    db, 
    collection, 
    additional_stages, 
    vector_index="vector_index_with_filter"
)