# OpenSearch support all similarities

In [1]:
import logging
logging.basicConfig(level=logging.WARNING)

In [2]:
from haystack.document_stores import OpenSearchDocumentStore
from haystack.nodes import DensePassageRetriever
from haystack.pipelines import DocumentSearchPipeline
from haystack.utils import clean_wiki_text, convert_files_to_dicts, fetch_archive_from_http, print_answers

def query(document_store, query="Who created the Dothraki vocabulary?", top_k=10):
    retriever = DensePassageRetriever(document_store=document_store,
                                    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
                                    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
                                    max_seq_len_query=64,
                                    max_seq_len_passage=256,
                                    batch_size=16,
                                    use_gpu=False,
                                    embed_title=True,
                                    use_fast_tokenizers=True)
    dot_pipe = DocumentSearchPipeline(retriever)
    dot_prediction = dot_pipe.run(
        query=query, params={"Retriever": {"top_k": top_k}}
    )
    return [(doc.meta, doc.score) for doc in dot_prediction["documents"]]

def index(document_store):
    document_store.delete_all_documents()
    doc_dir = "data/article_txt_got"
    s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt.zip"
    fetch_archive_from_http(url=s3_url, output_dir=doc_dir)

    # Convert files to dicts
    dicts = convert_files_to_dicts(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)

    # Now, let's write the dicts containing documents to our DB.
    document_store.write_documents(dicts)

    retriever = DensePassageRetriever(document_store=document_store,
                                    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
                                    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
                                    max_seq_len_query=64,
                                    max_seq_len_passage=256,
                                    batch_size=16,
                                    use_gpu=True,
                                    embed_title=True,
                                    use_fast_tokenizers=True)
    document_store.update_embeddings(retriever)

## Legacy dot_product index "document"

In [9]:
dot_document_store = OpenSearchDocumentStore(port=9201, index="document", similarity="dot_product")

  % self.host


In [10]:
index(dot_document_store)

                1. delete_all_documents() method is deprecated, please use delete_documents method
                For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
                
INFO:haystack.utils.import_utils:Found data stored in `data/article_txt_got`. Delete this first if you really want to fetch new data.
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/37_Joffrey_Baratheon.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/299_Rani_Mahal__TV_series_.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/224_The_Night_Lands.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/54_Two_Swords__Game_of_Thrones_.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/30_List_of_A_Song_of_Ice_and_Fire_characters.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/136_Game_of_Thrones__Season_8__soundtrack_.txt
INFO:haystack.utils.preprocess

In [11]:
query(dot_document_store)

INFO:haystack.modeling.utils:Using devices: CPU
INFO:haystack.modeling.utils:Number of GPUs: 0
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-question_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-question_encoder-single-nq-base
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-ctx_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-ctx_encoder-single-nq-base


[({'_split_id': 1, 'name': '214_Dothraki_language.txt'}, 0.6747633356019272),
 ({'_split_id': 0, 'name': '214_Dothraki_language.txt'}, 0.6723895478099154),
 ({'_split_id': 3, 'name': '214_Dothraki_language.txt'}, 0.6703240610800344),
 ({'_split_id': 2, 'name': '214_Dothraki_language.txt'}, 0.6697039111655755),
 ({'_split_id': 9, 'name': '9_Game_of_Thrones_Tapestry.txt'},
  0.6685668142411223),
 ({'_split_id': 9, 'name': '87_Valar_Dohaeris.txt'}, 0.6658625872468471),
 ({'_split_id': 0,
   'name': '469_Outline_of_A_Song_of_Ice_and_Fire_franchise.txt'},
  0.6652899219401275),
 ({'_split_id': 0,
   'name': '504_List_of_A_Song_of_Ice_and_Fire_video_games.txt'},
  0.6651136928156647),
 ({'_split_id': 6, 'name': '214_Dothraki_language.txt'}, 0.6637420340812874),
 ({'_split_id': 3, 'name': '9_Game_of_Thrones_Tapestry.txt'},
  0.6631771445092649)]

In [12]:
cos_document_store = OpenSearchDocumentStore(port=9201, index="document", similarity="cosine")

  % self.host


In [13]:
query(cos_document_store)

INFO:haystack.modeling.utils:Using devices: CPU
INFO:haystack.modeling.utils:Number of GPUs: 0
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-question_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-question_encoder-single-nq-base
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-ctx_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-ctx_encoder-single-nq-base


[({'_split_id': 0, 'name': '214_Dothraki_language.txt'}, 0.82161715),
 ({'_split_id': 1, 'name': '214_Dothraki_language.txt'}, 0.8164899),
 ({'_split_id': 9, 'name': '87_Valar_Dohaeris.txt'}, 0.80899405),
 ({'_split_id': 0,
   'name': '504_List_of_A_Song_of_Ice_and_Fire_video_games.txt'},
  0.807906),
 ({'_split_id': 2, 'name': '214_Dothraki_language.txt'}, 0.8044877),
 ({'_split_id': 6, 'name': '229_Game_of_Thrones.txt'}, 0.8036121),
 ({'_split_id': 0,
   'name': '469_Outline_of_A_Song_of_Ice_and_Fire_franchise.txt'},
  0.80161575),
 ({'_split_id': 2, 'name': '130_Game_of_Thrones_title_sequence.txt'},
  0.8003006),
 ({'_split_id': 9, 'name': '9_Game_of_Thrones_Tapestry.txt'}, 0.80004955),
 ({'_split_id': 5, 'name': '450_Baelor.txt'}, 0.80003355)]

### Indexing with 'wrong' similarity type is not an issue. Querying is just not optimized.

In [14]:
index(cos_document_store)

                1. delete_all_documents() method is deprecated, please use delete_documents method
                For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
                
INFO:haystack.utils.import_utils:Found data stored in `data/article_txt_got`. Delete this first if you really want to fetch new data.
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/37_Joffrey_Baratheon.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/299_Rani_Mahal__TV_series_.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/224_The_Night_Lands.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/54_Two_Swords__Game_of_Thrones_.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/30_List_of_A_Song_of_Ice_and_Fire_characters.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/136_Game_of_Thrones__Season_8__soundtrack_.txt
INFO:haystack.utils.preprocess

In [15]:
query(cos_document_store)

INFO:haystack.modeling.utils:Using devices: CPU
INFO:haystack.modeling.utils:Number of GPUs: 0
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-question_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-question_encoder-single-nq-base
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-ctx_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-ctx_encoder-single-nq-base


[({'_split_id': 0, 'name': '214_Dothraki_language.txt'}, 0.82161715),
 ({'_split_id': 1, 'name': '214_Dothraki_language.txt'}, 0.8164899),
 ({'_split_id': 9, 'name': '87_Valar_Dohaeris.txt'}, 0.80899405),
 ({'_split_id': 0,
   'name': '504_List_of_A_Song_of_Ice_and_Fire_video_games.txt'},
  0.807906),
 ({'_split_id': 2, 'name': '214_Dothraki_language.txt'}, 0.8044877),
 ({'_split_id': 6, 'name': '229_Game_of_Thrones.txt'}, 0.8036121),
 ({'_split_id': 0,
   'name': '469_Outline_of_A_Song_of_Ice_and_Fire_franchise.txt'},
  0.80161575),
 ({'_split_id': 2, 'name': '130_Game_of_Thrones_title_sequence.txt'},
  0.8003006),
 ({'_split_id': 9, 'name': '9_Game_of_Thrones_Tapestry.txt'}, 0.80004955),
 ({'_split_id': 5, 'name': '450_Baelor.txt'}, 0.80003355)]

In [16]:
query(dot_document_store)

INFO:haystack.modeling.utils:Using devices: CPU
INFO:haystack.modeling.utils:Number of GPUs: 0
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-question_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-question_encoder-single-nq-base
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-ctx_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-ctx_encoder-single-nq-base


[({'_split_id': 1, 'name': '214_Dothraki_language.txt'}, 0.6747633356019272),
 ({'_split_id': 0, 'name': '214_Dothraki_language.txt'}, 0.6723895478099154),
 ({'_split_id': 3, 'name': '214_Dothraki_language.txt'}, 0.6703240610800344),
 ({'_split_id': 2, 'name': '214_Dothraki_language.txt'}, 0.6697039111655755),
 ({'_split_id': 9, 'name': '9_Game_of_Thrones_Tapestry.txt'},
  0.6685668142411223),
 ({'_split_id': 9, 'name': '87_Valar_Dohaeris.txt'}, 0.6658625872468471),
 ({'_split_id': 0,
   'name': '469_Outline_of_A_Song_of_Ice_and_Fire_franchise.txt'},
  0.6652899219401275),
 ({'_split_id': 0,
   'name': '504_List_of_A_Song_of_Ice_and_Fire_video_games.txt'},
  0.6651136928156647),
 ({'_split_id': 6, 'name': '214_Dothraki_language.txt'}, 0.6637420340812874),
 ({'_split_id': 3, 'name': '9_Game_of_Thrones_Tapestry.txt'},
  0.6631771445092649)]

## New dot_product index 'document_dot_product'

In [19]:
dot_document_store = OpenSearchDocumentStore(port=9201, index="document_dot_product", similarity="dot_product")



In [20]:
index(dot_document_store)

                1. delete_all_documents() method is deprecated, please use delete_documents method
                For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
                
INFO:haystack.utils.import_utils:Found data stored in `data/article_txt_got`. Delete this first if you really want to fetch new data.
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/37_Joffrey_Baratheon.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/299_Rani_Mahal__TV_series_.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/224_The_Night_Lands.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/54_Two_Swords__Game_of_Thrones_.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/30_List_of_A_Song_of_Ice_and_Fire_characters.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/136_Game_of_Thrones__Season_8__soundtrack_.txt
INFO:haystack.utils.preprocess

In [21]:
query(dot_document_store)

INFO:haystack.modeling.utils:Using devices: CPU
INFO:haystack.modeling.utils:Number of GPUs: 0
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-question_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-question_encoder-single-nq-base
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-ctx_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-ctx_encoder-single-nq-base


[({'_split_id': 1, 'name': '214_Dothraki_language.txt'}, 0.6747633356019272),
 ({'_split_id': 0, 'name': '214_Dothraki_language.txt'}, 0.6723895478099154),
 ({'_split_id': 3, 'name': '214_Dothraki_language.txt'}, 0.6703240610800344),
 ({'_split_id': 2, 'name': '214_Dothraki_language.txt'}, 0.6697039111655755),
 ({'_split_id': 9, 'name': '9_Game_of_Thrones_Tapestry.txt'},
  0.6685668142411223),
 ({'_split_id': 9, 'name': '87_Valar_Dohaeris.txt'}, 0.6658625872468471),
 ({'_split_id': 0,
   'name': '469_Outline_of_A_Song_of_Ice_and_Fire_franchise.txt'},
  0.6652899219401275),
 ({'_split_id': 0,
   'name': '504_List_of_A_Song_of_Ice_and_Fire_video_games.txt'},
  0.6651136928156647),
 ({'_split_id': 6, 'name': '214_Dothraki_language.txt'}, 0.6637420340812874),
 ({'_split_id': 3, 'name': '9_Game_of_Thrones_Tapestry.txt'},
  0.6631771445092649)]

In [22]:
cos_document_store = OpenSearchDocumentStore(port=9201, index="document_dot_product", similarity="cosine")

  % self.host


In [23]:
query(cos_document_store)

INFO:haystack.modeling.utils:Using devices: CPU
INFO:haystack.modeling.utils:Number of GPUs: 0
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-question_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-question_encoder-single-nq-base
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-ctx_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-ctx_encoder-single-nq-base


[({'_split_id': 0, 'name': '214_Dothraki_language.txt'}, 0.82161715),
 ({'_split_id': 1, 'name': '214_Dothraki_language.txt'}, 0.8164899),
 ({'_split_id': 9, 'name': '87_Valar_Dohaeris.txt'}, 0.80899405),
 ({'_split_id': 0,
   'name': '504_List_of_A_Song_of_Ice_and_Fire_video_games.txt'},
  0.807906),
 ({'_split_id': 2, 'name': '214_Dothraki_language.txt'}, 0.8044877),
 ({'_split_id': 6, 'name': '229_Game_of_Thrones.txt'}, 0.8036121),
 ({'_split_id': 0,
   'name': '469_Outline_of_A_Song_of_Ice_and_Fire_franchise.txt'},
  0.80161575),
 ({'_split_id': 2, 'name': '130_Game_of_Thrones_title_sequence.txt'},
  0.8003006),
 ({'_split_id': 9, 'name': '9_Game_of_Thrones_Tapestry.txt'}, 0.80004955),
 ({'_split_id': 5, 'name': '450_Baelor.txt'}, 0.80003355)]

### Indexing with 'wrong' similarity type is not an issue. Querying is just not optimized.

In [24]:
index(cos_document_store)

                1. delete_all_documents() method is deprecated, please use delete_documents method
                For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
                
INFO:haystack.utils.import_utils:Found data stored in `data/article_txt_got`. Delete this first if you really want to fetch new data.
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/37_Joffrey_Baratheon.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/299_Rani_Mahal__TV_series_.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/224_The_Night_Lands.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/54_Two_Swords__Game_of_Thrones_.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/30_List_of_A_Song_of_Ice_and_Fire_characters.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/136_Game_of_Thrones__Season_8__soundtrack_.txt
INFO:haystack.utils.preprocess

In [25]:
query(cos_document_store)

INFO:haystack.modeling.utils:Using devices: CPU
INFO:haystack.modeling.utils:Number of GPUs: 0
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-question_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-question_encoder-single-nq-base
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-ctx_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-ctx_encoder-single-nq-base


[({'_split_id': 0, 'name': '214_Dothraki_language.txt'}, 0.82161715),
 ({'_split_id': 1, 'name': '214_Dothraki_language.txt'}, 0.8164899),
 ({'_split_id': 9, 'name': '87_Valar_Dohaeris.txt'}, 0.80899405),
 ({'_split_id': 0,
   'name': '504_List_of_A_Song_of_Ice_and_Fire_video_games.txt'},
  0.807906),
 ({'_split_id': 2, 'name': '214_Dothraki_language.txt'}, 0.8044877),
 ({'_split_id': 6, 'name': '229_Game_of_Thrones.txt'}, 0.8036121),
 ({'_split_id': 0,
   'name': '469_Outline_of_A_Song_of_Ice_and_Fire_franchise.txt'},
  0.80161575),
 ({'_split_id': 2, 'name': '130_Game_of_Thrones_title_sequence.txt'},
  0.8003006),
 ({'_split_id': 9, 'name': '9_Game_of_Thrones_Tapestry.txt'}, 0.80004955),
 ({'_split_id': 5, 'name': '450_Baelor.txt'}, 0.80003355)]

In [26]:
query(dot_document_store)

INFO:haystack.modeling.utils:Using devices: CPU
INFO:haystack.modeling.utils:Number of GPUs: 0
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-question_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-question_encoder-single-nq-base
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-ctx_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-ctx_encoder-single-nq-base


[({'_split_id': 1, 'name': '214_Dothraki_language.txt'}, 0.6747633356019272),
 ({'_split_id': 0, 'name': '214_Dothraki_language.txt'}, 0.6723895478099154),
 ({'_split_id': 3, 'name': '214_Dothraki_language.txt'}, 0.6703240610800344),
 ({'_split_id': 2, 'name': '214_Dothraki_language.txt'}, 0.6697039111655755),
 ({'_split_id': 9, 'name': '9_Game_of_Thrones_Tapestry.txt'},
  0.6685668142411223),
 ({'_split_id': 9, 'name': '87_Valar_Dohaeris.txt'}, 0.6658625872468471),
 ({'_split_id': 0,
   'name': '469_Outline_of_A_Song_of_Ice_and_Fire_franchise.txt'},
  0.6652899219401275),
 ({'_split_id': 0,
   'name': '504_List_of_A_Song_of_Ice_and_Fire_video_games.txt'},
  0.6651136928156647),
 ({'_split_id': 6, 'name': '214_Dothraki_language.txt'}, 0.6637420340812874),
 ({'_split_id': 3, 'name': '9_Game_of_Thrones_Tapestry.txt'},
  0.6631771445092649)]

### Let's clone embedding_field optimized for cosine similarity

In [27]:
cos_document_store = OpenSearchDocumentStore(port=9201, index="document_dot_product", similarity="cosine")

  % self.host


In [28]:
cos_document_store.embedding_field

'embedding'

In [29]:
cos_document_store.clone_embedding_field(new_embedding_field="embedding_cos",similarity="cosine")

Updating embeddings: 10000 Docs [00:12, 797.53 Docs/s]          


In [3]:
# we have to reload document_store with optimized embedding_field
cos_document_store = OpenSearchDocumentStore(port=9201, index="document_dot_product", similarity="cosine", embedding_field="embedding_cos")

  % self.host


In [4]:
query(cos_document_store)

INFO:haystack.modeling.utils:Using devices: CPU
INFO:haystack.modeling.utils:Number of GPUs: 0
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-question_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-question_encoder-single-nq-base
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-ctx_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-ctx_encoder-single-nq-base


[({'_split_id': 0, 'name': '214_Dothraki_language.txt'}, 0.8216167500965),
 ({'_split_id': 1, 'name': '214_Dothraki_language.txt'}, 0.816489847830545),
 ({'_split_id': 9, 'name': '87_Valar_Dohaeris.txt'}, 0.8089942687984654),
 ({'_split_id': 0,
   'name': '504_List_of_A_Song_of_Ice_and_Fire_video_games.txt'},
  0.8079059837341296),
 ({'_split_id': 2, 'name': '214_Dothraki_language.txt'}, 0.8044873640655329),
 ({'_split_id': 6, 'name': '229_Game_of_Thrones.txt'}, 0.8036122080456857),
 ({'_split_id': 0,
   'name': '469_Outline_of_A_Song_of_Ice_and_Fire_franchise.txt'},
  0.8016157956628386),
 ({'_split_id': 2, 'name': '130_Game_of_Thrones_title_sequence.txt'},
  0.8003004817599111),
 ({'_split_id': 9, 'name': '9_Game_of_Thrones_Tapestry.txt'},
  0.8000496684755249),
 ({'_split_id': 5, 'name': '450_Baelor.txt'}, 0.8000335395929169)]

## New cosine index 'document_cosine'

In [5]:
cos_document_store = OpenSearchDocumentStore(port=9201, index="document_cosine", similarity="cosine")

  % self.host


In [6]:
index(cos_document_store)

                1. delete_all_documents() method is deprecated, please use delete_documents method
                For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
                
INFO:haystack.utils.import_utils:Found data stored in `data/article_txt_got`. Delete this first if you really want to fetch new data.
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/37_Joffrey_Baratheon.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/299_Rani_Mahal__TV_series_.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/224_The_Night_Lands.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/54_Two_Swords__Game_of_Thrones_.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/30_List_of_A_Song_of_Ice_and_Fire_characters.txt
INFO:haystack.utils.preprocessing:Converting data/article_txt_got/136_Game_of_Thrones__Season_8__soundtrack_.txt
INFO:haystack.utils.preprocess

In [7]:
query(cos_document_store)

INFO:haystack.modeling.utils:Using devices: CPU
INFO:haystack.modeling.utils:Number of GPUs: 0
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-question_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-question_encoder-single-nq-base
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-ctx_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-ctx_encoder-single-nq-base


[({'_split_id': 0, 'name': '214_Dothraki_language.txt'}, 0.82161715),
 ({'_split_id': 1, 'name': '214_Dothraki_language.txt'}, 0.8164899),
 ({'_split_id': 9, 'name': '87_Valar_Dohaeris.txt'}, 0.80899405),
 ({'_split_id': 0,
   'name': '504_List_of_A_Song_of_Ice_and_Fire_video_games.txt'},
  0.807906),
 ({'_split_id': 2, 'name': '214_Dothraki_language.txt'}, 0.8044877),
 ({'_split_id': 6, 'name': '229_Game_of_Thrones.txt'}, 0.8036121),
 ({'_split_id': 0,
   'name': '469_Outline_of_A_Song_of_Ice_and_Fire_franchise.txt'},
  0.80161575),
 ({'_split_id': 2, 'name': '130_Game_of_Thrones_title_sequence.txt'},
  0.8003006),
 ({'_split_id': 9, 'name': '9_Game_of_Thrones_Tapestry.txt'}, 0.80004955),
 ({'_split_id': 5, 'name': '450_Baelor.txt'}, 0.80003355)]

In [8]:
dot_document_store = OpenSearchDocumentStore(port=9201, index="document_cosine", similarity="dot_product")

  % self.host


Exception: embedding_field 'embedding' is only optimized for similarity cosine. Cannot fall back to slow exact vector calculation: OpenSearch does not support slow exact vector calculation for similarity 'dot_product'. Consider cloning 'embedding' optimized for dot_product by calling clone_embedding_field().

### Let's clone embedding_field optimized for dot_product similarity

In [9]:
cos_document_store.clone_embedding_field(new_embedding_field="embedding_dot", similarity="dot_product")

Updating embeddings: 10000 Docs [00:22, 437.34 Docs/s]          


In [10]:
dot_document_store = OpenSearchDocumentStore(port=9201, index="document_cosine", similarity="dot_product", embedding_field="embedding_dot")

  % self.host


In [11]:
query(dot_document_store)

INFO:haystack.modeling.utils:Using devices: CPU
INFO:haystack.modeling.utils:Number of GPUs: 0
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-question_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-question_encoder-single-nq-base
INFO:haystack.modeling.model.language_model:LOADING MODEL
INFO:haystack.modeling.model.language_model:Could not find facebook/dpr-ctx_encoder-single-nq-base locally.
INFO:haystack.modeling.model.language_model:Looking on Transformers Model Hub (in local cache and online)...
INFO:haystack.modeling.model.language_model:Loaded facebook/dpr-ctx_encoder-single-nq-base


[({'_split_id': 1, 'name': '214_Dothraki_language.txt'}, 0.6747633356019272),
 ({'_split_id': 0, 'name': '214_Dothraki_language.txt'}, 0.6723895478099154),
 ({'_split_id': 3, 'name': '214_Dothraki_language.txt'}, 0.6703240610800344),
 ({'_split_id': 2, 'name': '214_Dothraki_language.txt'}, 0.6697039111655755),
 ({'_split_id': 9, 'name': '9_Game_of_Thrones_Tapestry.txt'},
  0.6685668142411223),
 ({'_split_id': 9, 'name': '87_Valar_Dohaeris.txt'}, 0.6658625872468471),
 ({'_split_id': 0,
   'name': '469_Outline_of_A_Song_of_Ice_and_Fire_franchise.txt'},
  0.6652899219401275),
 ({'_split_id': 0,
   'name': '504_List_of_A_Song_of_Ice_and_Fire_video_games.txt'},
  0.6651136928156647),
 ({'_split_id': 6, 'name': '214_Dothraki_language.txt'}, 0.6637420340812874),
 ({'_split_id': 3, 'name': '9_Game_of_Thrones_Tapestry.txt'},
  0.6631771445092649)]

## About us

This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany

We bring NLP to the industry via open source!  
Our focus: Industry specific language models & large scale QA systems.  
  
Some of our other work: 
- [German BERT](https://deepset.ai/german-bert)
- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)
- [FARM](https://github.com/deepset-ai/FARM)

Get in touch:
[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)

By the way: [we're hiring!](https://www.deepset.ai/jobs)