In [28]:
from haystack.nodes import PreProcessor
from haystack.utils import convert_files_to_docs

# pre-process docs 
def preprocess_docs(doc_dir):
    all_docs = convert_files_to_docs(dir_path=doc_dir)
    preprocessor = PreProcessor(
        clean_empty_lines=True,
        clean_whitespace=True,
        clean_header_footer=False,
        split_by="word",
        split_respect_sentence_boundary=True,
        split_overlap=30, 
        split_length=100
    )
    docs = preprocessor.process(all_docs)
    print(f"n_files_input: {len(all_docs)}\nn_docs_output: {len(docs)}")
    return docs

In [37]:
doc_dir = r"C:\Users\johna\anaconda3\envs\lfqa_env\haystack-lfqa\documents"
docs = preprocess_docs(doc_dir)

Preprocessing:   0%|                                                                             | 0/2 [00:00<?, ?docs/s]We found one or more sentences whose word count is higher than the split length.
Preprocessing: 100%|█████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 17.72docs/s]

n_files_input: 2
n_docs_output: 276





In [45]:
# from sqlalchemy import create_engine
# engine = create_engine('sqlite:///faiss_document_store.db')  # Use the correct path to your SQLite DB file
# engine.execute("DROP TABLE document")  # Be careful with this, it will delete all your documents!


<sqlalchemy.engine.cursor.LegacyCursorResult at 0x1e92adc3940>

In [46]:
from haystack.document_stores import FAISSDocumentStore

# create FAISS in memory
def vector_stores(docs):
    document_store = FAISSDocumentStore(sql_url="sqlite:///:memory:", faiss_index_factory_str="Flat", embedding_dim=384)
    document_store.write_documents(docs)
    return document_store

document_store = vector_stores(docs)

Writing Documents: 10000it [00:00, 16048.11it/s]                                                                         


In [47]:
from haystack.nodes import EmbeddingRetriever


def generate_embeddings(document_store):
    retriever = EmbeddingRetriever(
        document_store=document_store,
        embedding_model="sentence-transformers/all-MiniLM-L6-v2"
    )
    # Important:
    # Now that we initialized the Retriever, we need to call update_embeddings() to iterate over all
    # previously indexed documents and update their embedding representation.
    # While this can be a time consuming operation (depending on the corpus size), it only needs to be done once.
    # At query time, we only need to embed the query and compare it to the existing document embeddings, which is very fast.
    document_store.update_embeddings(retriever)
    return retriever

retriever = generate_embeddings(document_store)

Updating Embedding:   0%|                                                                     | 0/276 [00:00<?, ? docs/s]
Batches:   0%|                                                                                     | 0/9 [00:00<?, ?it/s][A
Batches:  11%|████████▌                                                                    | 1/9 [00:04<00:32,  4.05s/it][A
Batches:  22%|█████████████████                                                            | 2/9 [00:06<00:20,  2.87s/it][A
Batches:  33%|█████████████████████████▋                                                   | 3/9 [00:08<00:15,  2.55s/it][A
Batches:  44%|██████████████████████████████████▏                                          | 4/9 [00:09<00:10,  2.10s/it][A
Batches:  56%|██████████████████████████████████████████▊                                  | 5/9 [00:11<00:08,  2.01s/it][A
Batches:  67%|███████████████████████████████████████████████████▎                         | 6/9 [00:13<00:05,  1.97s/it][A
Bat

In [81]:
from haystack.nodes import PromptNode, PromptTemplate, AnswerParser

lfqa_prompt = PromptTemplate(
    prompt="""Synthesize a comprehensive answer from the text for the given question.
                             Provide a clear and concise response that summarizes the key points and information presented in the text.
                             Your answer should be in your own words and be no longer than 50 words.
                             \n\n Related text: {join(documents)} \n\n Question: {query} \n\n Answer:""",
    output_parser=AnswerParser(),
)

prompt_node = PromptNode(model_name_or_path="tiiuae/falcon-7b-instruct", 
                         default_prompt_template=lfqa_prompt, 
                         model_kwargs={'trust_remote_code': True, "load_in_4bit":True})
# "load_in_4bit":True,

Downloading (…)lve/main/config.json: 100%|███████████████████████████████████████████████| 667/667 [00:00<00:00, 221kB/s]
Downloading (…)/configuration_RW.py: 100%|███████████████████████████████████████████| 2.61k/2.61k [00:00<00:00, 859kB/s]
A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- configuration_RW.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
Downloading (…)main/modelling_RW.py: 100%|██████████████████████████████████████████| 47.5k/47.5k [00:00<00:00, 11.9MB/s]
A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- modelling_RW.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
Downloading (…)model.bin.index.json: 100%|██████████████████████████████████████

RuntimeError: [enforce fail at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 330366976 bytes.

In [55]:
from haystack.pipelines import Pipeline

pipe = Pipeline()
pipe.add_node(component=retriever, name="retriever", inputs=["query"])
pipe.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
output = pipe.run(query="what are ngram language models?")

print(output["answers"][0].answer)


Batches: 100%|█████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.52it/s]
Token indices sequence length is longer than the specified maximum sequence length for this model (1645 > 512). Running this sequence through the model will result in indexing errors
The prompt has been truncated from 1645 tokens to 412 tokens so that the prompt length and answer length (100 tokens) fit within the max token limit (512 tokens). Shorten the prompt to prevent it from being cut off


The simplest language model that assigns probabilities language model LM to sentences and sequences of words is the n-gram.
