In [1]:
from dotenv import load_dotenv
import os
from pymongo import MongoClient

# Load variables from .env file
load_dotenv()

# Get MongoDB connection string from environment
CONNECTION_STRING = os.getenv("MongoURI")
client=""
def  get_database():
    try:
        # Initialize MongoClient with connection string
        client = MongoClient(CONNECTION_STRING)
        
        # Test connection by accessing server info
        client.server_info()  # This will raise an error if connection fails
        
        print("Connected successfully to the database!")
        return client['user_shopping_list']
    
    except Exception as e:
        print(f"Failed to connect to the database: {str(e)}")
        return None

if __name__ == "__main__":
    dbname = get_database()
    if dbname is not None:
        print("Database connection is ready to use.")
    else:
        print("Database connection failed.")

Connected successfully to the database!
Database connection is ready to use.


In [2]:
# Ensure the client is initialized properly
if client == "":
    client = MongoClient(CONNECTION_STRING)

# List collections
db = client['revostate']
collections = db.list_collection_names()
print("Collections in the database:")
for collection in collections:
    print(f"- {collection}")

Collections in the database:
- account
- verification
- properties
- purchases
- companies
- packages
- systemadmins
- users
- session


In [None]:
from dotenv import load_dotenv
import os
from pymongo import MongoClient
from pymongo.operations import SearchIndexModel
from sentence_transformers import SentenceTransformer

# Load variables from .env file
load_dotenv()
CONNECTION_STRING = os.getenv("MongoURI")

def document_to_text(doc, is_company=False):
    """Convert specified document fields to a text string for embedding."""
    parts = []
    
    if is_company:
        if 'realEstateName' in doc and doc['realEstateName']:
            parts.append(doc['realEstateName'])
        if 'description' in doc and doc['description']:
            parts.append(doc['description'])
        if 'address' in doc:
            if isinstance(doc['address'], dict):
                for field in ['region', 'city', 'specificLocation']:
                    value = doc['address'].get(field)
                    if value:
                        parts.append(value)
            elif isinstance(doc['address'], str):
                parts.append(doc['address'])
    else:
        if 'title' in doc and doc['title']:
            parts.append(doc['title'])
        if 'description' in doc and doc['description']:
            parts.append(doc['description'])
        if 'price' in doc and doc['price'] is not None:
            parts.append(f"price: {doc['price']}")
        if 'area' in doc and doc['area'] is not None:
            parts.append(f"area: {doc['area']}")
        if 'landArea' in doc and doc['landArea'] is not None:
            parts.append(f"landArea: {doc['landArea']}")
        if 'listingType' in doc and doc['listingType']:
            parts.append(doc['listingType'])
        if 'amenities' in doc and doc['amenities']:
            parts.append(f"amenities: {', '.join(str(x) for x in doc['amenities'])}")
        if 'address' in doc:
            if isinstance(doc['address'], dict):
                for field in ['region', 'city', 'specificLocation']:
                    value = doc['address'].get(field)
                    if value:
                        parts.append(value)
            elif isinstance(doc['address'], str):
                parts.append(doc['address'])
    
    text = ' '.join(parts)
    return f"search_document: {text}"

def add_embeddings(collection, model, is_company=False):
    """Add embeddings to documents using sentence-transformers/all-MiniLM-L6-v2 (384 dimensions)."""
    for doc in collection.find():
        if "revoemb" in doc:
            continue
        text = document_to_text(doc, is_company)
        if not text.strip():
            continue
        embedding = model.encode([text])[0].tolist()
        collection.update_one(
            {"_id": doc["_id"]},
            {"$set": {"revoemb": embedding}}
        )
    print(f"Embeddings added to {collection.name} collection.")

def create_search_indexes(client):
    """Create vector search indexes for properties and companies."""
    try:
        client.server_info()
        print("Connected successfully to the database!")
        
        db = client['revostate']
        properties_collection = db['properties']
        companies_collection = db['companies']
        
        model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
        
        add_embeddings(properties_collection, model, is_company=False)
        add_embeddings(companies_collection, model, is_company=True)
        
        properties_vector_index = SearchIndexModel(
            definition={
                "fields": [
                    {
                        "type": "vector",
                        "numDimensions": 384,  # Updated to match all-MiniLM-L6-v2
                        "path": "revoemb",
                        "similarity": "cosine"
                    }
                ]
            },
            name="properties_vector_index",
            type="vectorSearch"
        )
        properties_collection.create_search_index(properties_vector_index)
        print("Vector search index created for 'properties' collection.")
        
        # Create Vector Search Index for Companies
        companies_vector_index = SearchIndexModel(
            definition={
                "fields": [
                    {
                        "type": "vector",
                        "numDimensions": 384,  # Updated to match all-MiniLM-L6-v2
                        "path": "revoemb",
                        "similarity": "cosine"
                    }
                ]
            },
            name="companies_vector_index",
            type="vectorSearch"
        )
        companies_collection.create_search_index(companies_vector_index)
        print("Vector search index created for 'companies' collection.")
        
        print("\nSearch Indexes for 'properties':")
        for index in properties_collection.list_search_indexes():
            print(index)
        print("\nSearch Indexes for 'companies':")
        for index in companies_collection.list_search_indexes():
            print(index)
        
        return db
    
    except Exception as e:
        print(f"Failed to create indexes: {str(e)}")
        return None

class MongoResult:
    def __init__(self, query, collection, is_company=False):
        self.query = query
        self.collection = collection
        self.is_company = is_company
        self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
        self.vectorized_query = self.vectorize_query(query)
    
    def vectorize_query(self, query):
        """Vectorize the query with search_query prefix (384 dimensions)."""
        query_text = f"search_query: {query}"
        embedding = self.model.encode([query_text])[0].tolist()
        return embedding
    
    def search(self, limit=10):
        """Perform vector search using the revoemb field."""
        try:
            pipeline = [
                {
                    "$vectorSearch": {
                        "index": "companies_vector_index" if self.is_company else "properties_vector_index",
                        "path": "revoemb",
                        "queryVector": self.vectorized_query,
                        "numCandidates": 100,
                        "limit": limit
                    }
                },
                {
                    "$project": {
                        "_id": 1,
                        "title": 1 if not self.is_company else None,
                        "description": 1,
                        "realEstateName": 1 if self.is_company else None,
                        "address": 1,
                        "price": 1 if not self.is_company else None,
                        "area": 1 if not self.is_company else None,
                        "landArea": 1 if not self.is_company else None,
                        "listingType": 1 if not self.is_company else None,
                        "amenities": 1 if not self.is_company else None,
                        "score": {"$meta": "vectorSearchScore"}
                    }
                }
            ]
            
            results = self.collection.aggregate(pipeline)
            return list(results)
        
        except Exception as e:
            print(f"Search failed: {str(e)}")
            return []

def main():
    # Initialize MongoClient
    client = MongoClient(CONNECTION_STRING)
    
    try:
        # Create indexes and get database
        db = create_search_indexes(client)
        if db is None:
            return
        
        # Example search for companies
        companies_collection = db['companies']
        query = "real estate agency in Bole"
        searcher = MongoResult(query, companies_collection, is_company=True)
        results = searcher.search(limit=10)
        
        print("\nVector Search Results for Companies:")
        for doc in results:
            print(f"Name: {doc.get('realEstateName')}, Address: {doc.get('address', {}).get('city')}, "
                  f"Score: {doc.get('score', 0):.4f}")
        
        # Example search for properties
        properties_collection = db['properties']
        query = "luxury villa in Bole for sale"
        searcher = MongoResult(query, properties_collection, is_company=False)
        results = searcher.search(limit=10)
        
        print("\nVector Search Results for Properties:")
        for doc in results:
            print(f"Title: {doc.get('title')}, Address: {doc.get('address', {}).get('city')}, "
                  f"Price: {doc.get('price', 0)}, Listing: {doc.get('listingType')}, "
                  f"Score: {doc.get('score', 0):.4f}")
    
    except Exception as e:
        print(f"Error: {str(e)}")
    
    finally:
        client.close()

if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm


Connected successfully to the database!
Embeddings added to properties collection.
Failed to create indexes: 'str' object has no attribute 'get'
