In [2]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, Batch
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from langchain_huggingface import HuggingFaceEmbeddings
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import Document, VectorStoreIndex
from pymongo import MongoClient
from bson import ObjectId
from llama_index.core import StorageContext, load_index_from_storage

---
## create some example documents from mongodb for yellow pages

In [9]:
def set_up_mongo(client_str,database_str,collection_str):
    client = MongoClient(client_str) #connect to mongodb client
    db = client[database_str] #connect to database

    existing_collections = db.list_collection_names() #check that dbs collections
    if collection_str not in existing_collections:
        db.create_collection(collection_str) #create collection if needed
    
    my_collection = db[collection_str] #connect to collection

    return my_collection

In [10]:
collection = set_up_mongo('mongodb://localhost:27017','webscraping_dataLabKiel','yellow_pages_clean') 

In [None]:
def create_doc(list_of_data, list_of_keys):
    docs = []
    for e in list_of_data:
        text = ""
        id = str(e["_id"])
        for k in list_of_keys:
            text += str(e.get(k))
        docs.append(Document(text=text, metadata={"mongo_id":id}))
    return docs

In [8]:
yp_data = [e for e in collection.find({},{"_id":1,"name":1,"keywords":1,"description":1})]
text_type = ["name","keywords","description"]
documents = create_doc(yp_data,text_type)

NameError: name 'collection' is not defined

---

## create client and vector store, add some data

In [3]:
DIMENSIONS = 384
client = QdrantClient(location=":memory:")

In [4]:
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L12-v2")
Settings.embed_model = embed_model

In [9]:
vector_store = QdrantVectorStore(client=client, collection_name="nlp_search")

In [10]:
index = VectorStoreIndex.from_documents(
    documents=documents,
    vector_store=vector_store,
)

In [11]:
# make persistent!!
index.set_index_id("nlp_search")
index.storage_context.persist("./nlp_search")

## connect to persistent & retriever

now create to persistent storage??

In [6]:
storage_context = StorageContext.from_defaults(persist_dir="./nlp_search")
index = load_index_from_storage(storage_context, index_id="nlp_search")

AttributeError: 'OpenAIEmbedding' object has no attribute '__pydantic_private__'

In [73]:
retriever = index.as_retriever(similarity_top_k=10)
results = retriever.retrieve('Ich will mir morgen die haare schneiden lassen welche friseure haben auf') 

In [74]:
batch_ids = [ObjectId(r.metadata["mongo_id"]) for r in results]

In [75]:
results

[NodeWithScore(node=TextNode(id_='e340667e-c341-44fe-a919-7dfe4b77ade6', embedding=None, metadata={'mongo_id': '672a215f5fa167f70744c017'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='fd70742f-478c-4383-9ddf-01f478dd9546', node_type='4', metadata={'mongo_id': '672a215f5fa167f70744c017'}, hash='976c9c26ab17fa139aa20263562fdea2aa084eb56fc090c5f186c840c5856440')}, metadata_template='{key}: {value}', metadata_separator='\n', text="Horn von Holger Friseursalon['Haare färben', 'Haare föhnen', 'Haare schneiden', 'Haare waschen']None", mimetype='text/plain', start_char_idx=0, end_char_idx=100, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.3944848300143057),
 NodeWithScore(node=TextNode(id_='994f4312-4809-4cff-a199-94bc8ce705aa', embedding=None, metadata={'mongo_id': '672a21885fa167f70744c11d'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships

## get mongodb data via object ids

In [62]:
matches = [e for e in collection.find({"_id":{"$in": batch_ids}})]

In [63]:
matches

[{'_id': ObjectId('672a212d5fa167f70744bee7'),
  'address': {'addressCountry': 'DE',
   'streetAddress': 'Norderstr. 7',
   '@type': 'PostalAddress',
   'postalCode': '25767',
   'addressLocality': 'Albersdorf'},
  'keywords': ['Haare färben',
   'Haare föhnen',
   'Haare schneiden',
   'Haare waschen'],
  '@type': ['LocalBusiness', 'HealthAndBeautyBusiness', 'HairSalon'],
  'latitude': '54.14721',
  'name': 'Salon Hoffmann Friseursalon',
  'dateModified': '2019-09-16',
  'telephone': '04835 3 49',
  'openingHours': ['Di 08:00-12:00',
   'Di 13:30-18:00',
   'Mi 08:00-12:00',
   'Mi 13:30-18:00',
   'Do 08:00-12:00',
   'Do 13:30-18:00',
   'Fr 08:00-12:00',
   'Fr 13:30-18:00',
   'Sa 08:00-12:30'],
  '@id': 'https://www.gelbeseiten.de/gsbiz/ff7be14e-4196-4e56-b736-3ccec4bcf4c5',
  '@context': 'https://schema.org/',
  'url': 'https://www.gelbeseiten.de/gsbiz/ff7be14e-4196-4e56-b736-3ccec4bcf4c5',
  'longitude': '9.283368',
  'lat': 54.14721,
  'lon': 9.283368},
 {'_id': ObjectId('672a

---