In [1]:
from haystack import Pipeline
from haystack.nodes.prompt import PromptNode
from haystack.nodes import PromptModel
from haystack.nodes.prompt.prompt_template import PromptTemplate
from haystack.nodes import AnswerParser
from haystack.nodes.ranker import SentenceTransformersRanker
from haystack.nodes.retriever import BM25Retriever
from haystack.document_stores import InMemoryDocumentStore
from haystack import Document

  from .autonotebook import tqdm as notebook_tqdm
2024-04-12 10:29:14.636714: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  return torch._C._cuda_getDeviceCount() > 0
2024-04-12 10:29:19,450	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


First, load the data

In [3]:
from warthunder2 import spider,warthunderRead
keys,descs,usages,histories = warthunderRead.paraseInCategories(warthunderRead.genRencentCategories())
strList=usages

A wrapper class of haystack retriver, assuming CANDY lib is placed at "../../../libCANDY.so"

In [2]:
from transformers import DPRContextEncoder, DPRContextEncoderTokenizer, AutoTokenizer
from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
from transformers import BatchEncoding
import torch
import time
import os, shutil
import gzip
from datasets import load_dataset, Dataset
# the files used/created by pickle are temporary and don't pose any security issue
import pickle  # nosec
import random
import numpy as np
import numpy.typing as npt
import nltk
import sys
from typing import Optional, Union
from typing import Dict, List, Optional, Union, Any

import logging
from collections import OrderedDict, namedtuple

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

from haystack.schema import Document
from haystack.document_stores.base import BaseDocumentStore, FilterType
from haystack.document_stores import KeywordDocumentStore
from haystack.nodes.retriever import BaseRetriever
from haystack.errors import DocumentStoreError

class CANDYRetriever(BaseRetriever):
    def __init__(
        self,
        document_store: Optional[KeywordDocumentStore] = None,
        top_k: int = 10,
        all_terms_must_match: bool = False,
        custom_query: Optional[str] = None,
        custom_rania_name:Optional[str] = 'aknn0',
        scale_score: bool = True,
    ):
        """
        :param document_store: An instance of one of the following DocumentStores to retrieve from: InMemoryDocumentStore, ElasticsearchDocumentStore and OpenSearchDocumentStore.
            If None, a document store must be passed to the retrieve method for this Retriever to work.
        :param all_terms_must_match: Whether all terms of the query must match the document.
                                     If true all query terms must be present in a document in order to be retrieved (i.e the AND operator is being used implicitly between query terms: "cozy fish restaurant" -> "cozy AND fish AND restaurant").
                                     Otherwise at least one query term must be present in a document in order to be retrieved (i.e the OR operator is being used implicitly between query terms: "cozy fish restaurant" -> "cozy OR fish OR restaurant").
                                     Defaults to False.
        :param custom_query: The query string containing a mandatory `${query}` and an optional `${filters}` placeholder.

                                **An example custom_query:**

                                ```python
                                {
                                    "size": 10,
                                    "query": {
                                        "bool": {
                                            "should": [{"multi_match": {
                                                "query": ${query},                 // mandatory query placeholder
                                                "type": "most_fields",
                                                "fields": ["content", "title"]}}],
                                            "filter": ${filters}                  // optional filter placeholder
                                        }
                                    },
                                }
                                ```

                            **For this custom_query, a sample `retrieve()` could be:**

                            ```python
                            self.retrieve(query="Why did the revenue increase?",
                                          filters={"years": ["2019"], "quarters": ["Q1", "Q2"]})
                            ```

                             Optionally, highlighting can be defined by specifying Elasticsearch's highlight settings.
                             See https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html.
                             You will find the highlighted output in the returned Document's meta field by key "highlighted".


                                 **Example custom_query with highlighting:**

                                 ```python
                                 {
                                     "size": 10,
                                     "query": {
                                         "bool": {
                                             "should": [{"multi_match": {
                                                 "query": ${query},                 // mandatory query placeholder
                                                 "type": "most_fields",
                                                 "fields": ["content", "title"]}}],
                                         }
                                     },
                                     "highlight": {             // enable highlighting
                                         "fields": {            // for fields content and title
                                             "content": {},
                                             "title": {}
                                         }
                                     },
                                 }
                                 ```

                                 **For this custom_query, highlighting info can be accessed by:**
                                ```python
                                docs = self.retrieve(query="Why did the revenue increase?")
                                highlighted_content = docs[0].meta["highlighted"]["content"]
                                highlighted_title = docs[0].meta["highlighted"]["title"]
                                ```

        :param top_k: How many documents to return per query.
        :param scale_score: Whether to scale the similarity score to the unit interval (range of [0,1]).
                            If true (default) similarity scores (e.g. cosine or dot_product) which naturally have a different value range will be scaled to a range of [0,1], where 1 means extremely relevant.
                            Otherwise raw similarity scores (e.g. cosine or dot_product) will be used.

        """
        super().__init__()
        self.document_store: Optional[KeywordDocumentStore] = document_store
        self.top_k = top_k
        self.custom_query = custom_query
        self.all_terms_must_match = all_terms_must_match
        self.scale_score = scale_score
        self.ctx_encoder = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
        self.ctx_tokenizer = AutoTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
        self.device = "cuda:" + str(torch.cuda.current_device()) if torch.cuda.is_available() else "cpu"
        print(self.device)
        self.ctx_encoder = self.ctx_encoder.to(self.device)
        self.q_encoder = DPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
        self.q_tokenizer = AutoTokenizer.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
        self.q_encoder = self.q_encoder.to(self.device)
        """Loading CANDY

        """
        torch.ops.load_library("../../../libCANDY.so")
        self.rania_name=custom_rania_name
        # gen the input tensor
        torch.ops.CANDY.index_create(self.rania_name, 'flatAMMIPObj')
        torch.ops.CANDY.index_editCfgI64(self.rania_name,'vecDim',768)
        torch.ops.CANDY.index_init(self.rania_name)
        

    def retrieve(
        self,
        query: str,
        filters: Optional[FilterType] = None,
        top_k: Optional[int] = None,
        all_terms_must_match: Optional[bool] = None,
        index: Optional[str] = None,
        headers: Optional[Dict[str, str]] = None,
        scale_score: Optional[bool] = None,
        document_store: Optional[BaseDocumentStore] = None,
    ) -> List[Document]:
        """
        Scan through documents in DocumentStore and return a small number documents
        that are most relevant to the query.

        :param query: The query
        """
        embedQ,listQ = self.encodeQuery(query)
        q0=embedQ[0:1,:]
        ru=torch.ops.CANDY.index_searchString (self.rania_name,q0,self.top_k)
        strList=ru[0]
        documents = [Document( content=item) for item in strList]
        return documents

    def retrieve_batch(
        self,
        queries: List[str],
        filters: Optional[Union[FilterType, List[Optional[FilterType]]]] = None,
        top_k: Optional[int] = None,
        all_terms_must_match: Optional[bool] = None,
        index: Optional[str] = None,
        headers: Optional[Dict[str, str]] = None,
        batch_size: Optional[int] = None,
        scale_score: Optional[bool] = None,
        document_store: Optional[BaseDocumentStore] = None,
    ) -> List[List[Document]]:
        documents = [self.retrieve(i) for i in queries]
        return documents
    def  generate_embeddings(self,model: Union[DPRContextEncoder, DPRQuestionEncoder], encoded_input: BatchEncoding, dim: int,
                        batch_size: int, device: str) -> torch.tensor:
        n_seq = len(encoded_input['input_ids'])
        shapeOut=(n_seq, dim)
        token_embeddings_out = torch.zeros(shapeOut)

        print('Doing inference for', n_seq, 'sequences.')

        model.eval()

        num_batches = int(np.ceil(float(n_seq) / batch_size))
        batch_print = 100
        if device != "cpu":
            start1 = torch.cuda.Event(enable_timing=True)
            end1 = torch.cuda.Event(enable_timing=True)
            start1.record()
        with torch.no_grad():
            for batch in range(num_batches):

                batch_init = batch * batch_size
                batch_end = np.min([batch_init + batch_size, n_seq])

                token_embeddings = model(encoded_input['input_ids'][batch_init:batch_end].to(device),
                                        encoded_input['attention_mask'][batch_init:batch_end].to(device))
                token_embeddings_out[batch_init:batch_end, :] = token_embeddings.pooler_output.cpu()
                if not (batch % batch_print):
                    print('Doing inference for batch', batch, 'of', num_batches)
        if device != "cpu":
            end1.record()
            torch.cuda.synchronize()
            print(f'Inference for {n_seq}, sequences took {(start1.elapsed_time(end1) / 1000):.2f} s')

        return token_embeddings_out
    def tokenize_texts(self,ctx_tokenizer: AutoTokenizer, texts: list, max_length: Optional[int] = None,
                   doc_stride: Optional[int] = None,
                   text_type: Optional[str] = "context", save_sentences: Optional[bool] = False, \
                   fname_sentences: Optional[str] = None) -> BatchEncoding:
        if text_type == "context":
            if max_length == None:
                max_length = 8192
                print("Setting max_length to", max_length)
            if doc_stride == None:
                doc_stride = int(max_length / 2)
                print("Setting doc_stride to", doc_stride)

        start = time.time()
        if text_type == "context":
            encoded_inputs = ctx_tokenizer(texts, padding=True, truncation=True, max_length=max_length, \
                                        return_overflowing_tokens=True, \
                                        stride=doc_stride, return_tensors="pt")
        elif text_type == "query":
            encoded_inputs = ctx_tokenizer(texts, padding=True, truncation=True, return_tensors="pt")

        end = time.time()
        delta_time = end - start
        print(f'Tokenization for {len(texts)}, contexts took {delta_time:.2f} s')

        n_seq = len(encoded_inputs['input_ids'])
        if save_sentences:
            if fname_sentences is not None:
                # Code to generate sentences from tokens
                sentences = []
                for i in range(n_seq):
                    if not (i % 100000):
                        print('Processing sentence', i, 'of', n_seq)
                    sentences += [' '.join(encoded_inputs.tokens(i))]

                with open(fname_sentences, 'wb') as f:
                    pickle.dump(sentences, f)
                del sentences
            else:
                raise BaseException(
                    'tokenize_texts: The filename where the original sentences will be saved was not specified.')

        return encoded_inputs
    def encodeContext (self,ctx:str, batchSize: Optional[int] = 64):
        encoded_contexts = self.tokenize_texts(self.ctx_tokenizer, ctx, text_type="context").to(self.device)
        embeddings_batch = self.generate_embeddings(self.ctx_encoder, encoded_contexts, 768, batchSize,
                                               self.device)
                # Define the string to initialize each element
        initial_string = ctx

        # Create the list using a list comprehension
        list_of_strings = [initial_string for _ in range(embeddings_batch.size(0))]
        return embeddings_batch,list_of_strings
    def encodeQuery (self,qtx:str, batchSize: Optional[int] = 64):
        text_type = "query"
        encoded_quries = self.tokenize_texts(self.q_tokenizer, qtx, text_type=text_type).to(self.device)
        embeddings_batch = self.generate_embeddings(self.q_encoder, encoded_quries, 768, batchSize,
                                               self.device)
        initial_string = qtx

        # Create the list using a list comprehension
        list_of_strings = [initial_string for _ in range(embeddings_batch.size(0))]
        return embeddings_batch,list_of_strings
    def insertContext(self,ctx:str):
        embeddings_batch,list_of_strings=self.encodeContext(ctx)
        return torch.ops.CANDY.index_insertString (self.rania_name,embeddings_batch,list_of_strings)
    def deleteContext(self,ctx:str):
        embeddings_batch,list_of_strings=self.encodeContext(ctx)
        return torch.ops.CANDY.index_deleteString(self.rania_name,embeddings_batch,1)
    def deleteQuery(self,qtx:str,k=1):
        embeddings_batch,list_of_strings=self.encodeQuery(qtx)
        return torch.ops.CANDY.index_deleteString(self.rania_name,embeddings_batch,k)

In [4]:
a=CANDYRetriever(top_k=1)
ctx = strList[100:175]
qtx = ['What is A-6E_TRAM?']
for i in ctx:
    a.insertContext(i)

Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.weight', 'ctx_encoder.bert_model.pooler.dense.bias']
- This IS expected if you are initializing DPRContextEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRContextEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


cpu


Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.weight', 'question_encoder.bert_model.pooler.dense.bias']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


OSError: /home/tony/projects/CANDY/libCANDY.so: cannot open shared object file: No such file or directory

In [6]:
ru=a.retrieve(qtx[0])
print(ru)

Tokenization for 16, contexts took 0.00 s
Doing inference for 1 sequences.
Doing inference for batch 0 of 1
Inference for 1, sequences took 0.01 s
[<Document: {'content': 'Usage of P-51D-30:  Offense ', 'content_type': 'text', 'score': None, 'meta': {}, 'id_hash_keys': ['content'], 'embedding': None, 'id': '513ed827de1f3047b09b1108244aa81'}>]


In [10]:
print(a.retrieve('what is A-6E_TRAM?'))

Tokenization for 18, contexts took 0.00 s
Doing inference for 1 sequences.
Doing inference for batch 0 of 1
Inference for 1, sequences took 0.01 s
[<Document: {'content': 'Usage of A-6E_TRAM:  Describe the tactics of playing in the aircraft, the features of using aircraft in a team and advice on tactics. Refrain from creating a "guide" - do not impose a single point of view, but instead, give the reader food for thought. Examine the most dangerous enemies and give recommendations on fighting them. If necessary, note the specifics of the game in different modes (AB, RB, SB). ', 'content_type': 'text', 'score': None, 'meta': {}, 'id_hash_keys': ['content'], 'embedding': None, 'id': '28e778ece84e1914914962eef3b1656f'}>]


Until here, we have susccefully implemented a retriver with CANDY, which can basically handle Open QA problems. Now, let's do something more fancy! Firsy, please download llama model file as follows

In [2]:
from fastrag.prompters.invocation_layers.llama_cpp import LlamaCPPInvocationLayer
from haystack import Pipeline
from haystack.nodes.prompt import PromptNode
from haystack.nodes import PromptModel
from haystack.nodes.prompt.prompt_template import PromptTemplate
from haystack.nodes import AnswerParser
from haystack.nodes.ranker import SentenceTransformersRanker
from haystack.nodes.retriever import BM25Retriever
from haystack.document_stores import InMemoryDocumentStore
from haystack import Document

In [25]:
retriever = CANDYRetriever(top_k=1,custom_rania_name='warthunder')
reranker = SentenceTransformersRanker(
    batch_size= 32,
    model_name_or_path= "cross-encoder/ms-marco-MiniLM-L-6-v2",
    top_k= 1,
    use_gpu= False
)
AParser = AnswerParser()
LFQA = PromptTemplate(
    prompt="""{join(documents)}
Question: {query}
Answer: """,
    output_parser= AParser
)
PrompterModel = PromptModel(
    model_name_or_path= 'openlm-research/open_llama_3b',
    model_kwargs={ 'max_new_token':256}
)
Prompter = PromptNode(
    model_name_or_path= PrompterModel,
    default_prompt_template= LFQA
)
pipe = Pipeline()

pipe.add_node(component=retriever, name= 'Retriever',inputs= ["Query"])
pipe.add_node(component=reranker, name= 'Reranker',inputs= ["Retriever"])
pipe.add_node(component=Prompter, name= 'Prompter',inputs= ["Reranker"])

Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.weight', 'ctx_encoder.bert_model.pooler.dense.bias']
- This IS expected if you are initializing DPRContextEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRContextEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


cuda:0


Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


INFO:Sat Mar 23 14:58:55 2024:/home/rag/projects/CANDY/src/CANDY/FlatAMMIPObjIndex.cpp:24|virtual bool CANDY::FlatAMMIPObjIndex::setConfig(INTELLI::ConfigMapPtr)|[34mSize of DCO=-1[0m


Let's first ask about F-16A_ADF plane without adding war thunder knowledge

In [26]:
rubbishStr = 'hahahahahahahahahahaha'
retriever.insertContext(rubbishStr)
answer_result = pipe.run("How to use F-16A_ADF?",params={
    "Retriever": {
        "top_k": 1
    },
    "Reranker": {
        "top_k": 1
    },
    "generation_kwargs":{
        "do_sample": False,
        "max_new_tokens": 128
    }
})
print(f"Answer: {answer_result['answers'][0].answer}")

Setting max_length to 8192
Setting doc_stride to 4096
Tokenization for 22, contexts took 0.00 s
Doing inference for 1 sequences.
Doing inference for batch 0 of 1
Inference for 1, sequences took 0.00 s
Tokenization for 21, contexts took 0.00 s
Doing inference for 1 sequences.
Doing inference for batch 0 of 1
Inference for 1, sequences took 0.00 s
Answer: 1. Open the F-16A_ADF.exe file.
2. Click on the "Start" button.
3. Click on the "F-16A_ADF" icon.
4. Click on the "Start" button.
5. Click on the "F-16A_ADF" icon.
6. Click on the "Start" button.
7. Click on the "F-16A_AD


Llama knows something, but not too much, let's feed the RAG with war thunder knowledge, here is what we are going to feed

In [27]:
for i in range(100,175):
    if(keys[i]=='F-16A_ADF' or keys[i]=='F-16A'):
        print("idx="+str(i)+", usage:"+strList[i])

idx=167, usage:Usage of F-16A:  The F-16A can be a powerful fighter with multiple playstyles, one option being to simply rush straight into battle. Players who plan to stay low and accelerate straight into action will be satisfied with the F-16's low-altitude performance and radar. A mixed countermeasure loadout (1 flare, 1 chaff) is recommended because it allows 30 countermeasure drops against radar or IR missiles. To be prepared for matches that may extend longer than usual, select the 30-minute fuel option in conjunction with the 300-gallon drop tank, or the full tank of 41 minutes. The F-16 consumes a lot of fuel on afterburner, so it's essential to use it wisely unless the max fuel amount is selected. If low fuel maneuverability is desirable, bringing 20 minutes on smaller maps and 30 minutes on larger maps is viable, but this requires management of fuel (i.e.  "halfterburner" or using the scroll wheel to partially engage afterburner). 
idx=168, usage:Usage of F-16A_ADF:  The F-16

Here is the online feeding

In [28]:
for i in strList[100:175]:
    print(i)
    retriever.insertContext(i)

Usage of B-24D-25-CO:  The Liberator will face many high-altitude dedicated bomber interceptors with 20–30 mm cannons, making for an extremely dangerous environment. The defensive guns should be treated as a last resort; combat with fighters should be avoided at all costs by the use of careful route planning and cloud cover, if available. 
Setting max_length to 8192
Setting doc_stride to 4096
Tokenization for 341, contexts took 0.00 s
Doing inference for 1 sequences.
Doing inference for batch 0 of 1
Inference for 1, sequences took 0.00 s
Usage of B-29A-BN:  The ultimate strategic bomber for the USAF. Loaded up with eight metric tonnes of bombs and twelve good ol' Browning heavy machine guns, this is the Superfortress. A wonderful advertising name, but don't be fooled, it is bigger than a barn and thus impossible to miss. Keep a safe distance from any hot combat zones. Your defensive turrets will not keep you safe, they will shred any foe getting close, but the B-29's enemies do not nee

Do it again?

In [29]:

answer_result = pipe.run("How to use F-16A_ADF?",params={
    "Retriever": {
        "top_k": 2
    },
    "Reranker": {
        "top_k": 2
    },
    "generation_kwargs":{
        "do_sample": False,
        "max_new_tokens": 256
    }
})
print(f"Llama: {answer_result['answers'][0].answer}")

Tokenization for 21, contexts took 0.00 s
Doing inference for 1 sequences.
Doing inference for batch 0 of 1
Inference for 1, sequences took 0.00 s
Llama: 


*

*F-16A_ADF is a very powerful fighter with multiple playstyles, rushing straight into battle or hanging high and utilizing the AIM-7M Sparrows. Players that plan to stay low and accelerate straight into action will be satisfied with the F-16's low-altitude performance and radar. Players that want to keep high and sling AIM-7Ms at targets will be more than pleased with the F-1


Now it knows that F-16A_ADF is some air craft from warthunder knowledge!

Let's remove the most relevant knowledge of XP-936 as follows

In [31]:
retriever.deleteQuery("How to use F-16A_ADF?",k=2)

Tokenization for 21, contexts took 0.00 s
Doing inference for 1 sequences.
Doing inference for batch 0 of 1
Inference for 1, sequences took 0.00 s


tensor([[1.]])

In [32]:
answer_result = pipe.run("How to use F-16A_ADF?",params={
    "Retriever": {
        "top_k": 2
    },
    "Reranker": {
        "top_k": 2
    },
    "generation_kwargs":{
        "do_sample": False,
        "max_new_tokens": 128
    }
})
print(f"Llama: {answer_result['answers'][0].answer}")

Tokenization for 21, contexts took 0.00 s
Doing inference for 1 sequences.
Doing inference for batch 0 of 1
Inference for 1, sequences took 0.00 s
Llama: 


*

*F-16A_ADF is a stand-alone program that can be used to simulate the F-16A in the ADF (Air Defense Fighter) role.

*F-16A_ADF is a stand-alone program that can be used to simulate the F-16A in the ADF (Air Defense Fighter) role.

*F-16A_ADF is a stand


See, Llama becomes fool again, but it tries to satisify us by using other WarThunder knowledge, such as F-16A's knowledge