# Metadata filtering

In [1]:
%load_ext autoreload
%autoreload 2

Import dependencies

In [2]:
from haystack import component, Pipeline, Document
from typing import List
from haystack.document_stores.in_memory import InMemoryDocumentStore

Create a custom resource

In [3]:
document_store = InMemoryDocumentStore()

In [4]:
document_store.write_documents(
    [
        Document(content="Is there a way to prevent the model from overfitting?",
                 meta={"type": "question","domain": "ai"}),
        Document(content="How can I reduce the overfitting of my model?",
                 meta={"type": "question","domain": "ai"}),
        Document(content="How to avoid overfitting in my model?",
                 meta={"type": "question","domain": "ai"}),
        Document(content="To avoid overfitting, you can use techniques like early stopping and dropout.",
                 meta={"type": "answer","domain": "ai"}),
        Document(content="Kubernetes is a portable, extensible, open-source platform for managing containerized workloads and services, that facilitates both declarative configuration and automation.",
                 meta={"type": "answer","domain": "cloud"}),
        Document(content="Harry Potter is a series of seven fantasy novels written by British author, J. K. Rowling.",
                 meta={"type": "answer","domain": "books"}),
    ]
)

6

In [5]:
document_store.filter_documents()

[Document(id=dda826befdf17b88a9d7394afd0f5f3073cbef06c8937083540dce0a9397c0d0, content: 'Is there a way to prevent the model from overfitting?', meta: {'type': 'question', 'domain': 'ai'}),
 Document(id=2cce5b2bb65191b25dc48ba8668d7f58ead16366077f6a9a56b0e1e57df06640, content: 'How can I reduce the overfitting of my model?', meta: {'type': 'question', 'domain': 'ai'}),
 Document(id=05eefcbe95ff6ff8bc7b458b9a0ab0db8fae8d88809b476ee7debb3503e6b614, content: 'How to avoid overfitting in my model?', meta: {'type': 'question', 'domain': 'ai'}),
 Document(id=44c7664ebb6ed84959e9baf2108b5161338586445b3c59078a117791c08aaff4, content: 'To avoid overfitting, you can use techniques like early stopping and dropout.', meta: {'type': 'answer', 'domain': 'ai'}),
 Document(id=072f6e30d57aa4cb661e327213d9960adf5b2bd59f363417e71ec9cc009b1932, content: 'Kubernetes is a portable, extensible, open-source platform for managing containerized workloads and ...', meta: {'type': 'answer', 'domain': 'cloud'}),
 

Filter the documents based on the domain field

In [6]:
filters = {
    "field": "meta.domain",
    "operator": "==",
    "value": "ai",
}

results = document_store.filter_documents(filters=filters)

results

[Document(id=dda826befdf17b88a9d7394afd0f5f3073cbef06c8937083540dce0a9397c0d0, content: 'Is there a way to prevent the model from overfitting?', meta: {'type': 'question', 'domain': 'ai'}),
 Document(id=2cce5b2bb65191b25dc48ba8668d7f58ead16366077f6a9a56b0e1e57df06640, content: 'How can I reduce the overfitting of my model?', meta: {'type': 'question', 'domain': 'ai'}),
 Document(id=05eefcbe95ff6ff8bc7b458b9a0ab0db8fae8d88809b476ee7debb3503e6b614, content: 'How to avoid overfitting in my model?', meta: {'type': 'question', 'domain': 'ai'}),
 Document(id=44c7664ebb6ed84959e9baf2108b5161338586445b3c59078a117791c08aaff4, content: 'To avoid overfitting, you can use techniques like early stopping and dropout.', meta: {'type': 'answer', 'domain': 'ai'})]

In [7]:
filters = {
    "operator": "AND",
    "conditions": [
        {"field": "meta.domain", "operator": "==", "value": "ai"},
        {"field": "meta.type", "operator": "==", "value": "answer"},
    ]
}

results = document_store.filter_documents(filters=filters)

results

[Document(id=44c7664ebb6ed84959e9baf2108b5161338586445b3c59078a117791c08aaff4, content: 'To avoid overfitting, you can use techniques like early stopping and dropout.', meta: {'type': 'answer', 'domain': 'ai'})]