## Librerie

In [1]:
from langchain.vectorstores import Chroma    # vectordb
from langchain_community.llms import Ollama  # llm
from langchain_community.embeddings import HuggingFaceEmbeddings # embedder
from langchain.chains.combine_documents import create_stuff_documents_chain # chain 
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain


from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
import pprint

## Load documents

In [2]:
from datasets import load_dataset
rag_dataset = load_dataset("neural-bridge/rag-dataset-12000")

In [3]:
train, test = rag_dataset['train'], rag_dataset['test']

In [4]:
pprint.pprint(train[0])

{'answer': 'The Berry Export Summary 2028 is a dedicated export plan for the '
           'Australian strawberry, raspberry, and blackberry industries. It '
           'maps the sectors’ current position, where they want to be, '
           'high-opportunity markets, and next steps. The purpose of this plan '
           'is to grow their global presence over the next 10 years.',
 'context': 'Caption: Tasmanian berry grower Nic Hansen showing Macau chef '
            'Antimo Merone around his property as part of export engagement '
            'activities.\n'
            'THE RISE and rise of the Australian strawberry, raspberry and '
            'blackberry industries has seen the sectors redouble their '
            'international trade focus, with the release of a dedicated export '
            'plan to grow their global presence over the next 10 years.\n'
            'Driven by significant grower input, the Berry Export Summary 2028 '
            'maps the sectors’ current position,

In [5]:
from langchain.docstore.document import Document as LangchainDocument
from tqdm.notebook import tqdm

RAW_KNOWLEDGE_BASE = [
    LangchainDocument(page_content=doc["context"], metadata = {'dataset_id': id}) for id, doc in tqdm(enumerate(train)) if doc['question'] != None
]

QUESTIONS = [doc["question"] for doc in tqdm(train) if doc['question'] != None]

0it [00:00, ?it/s]

  0%|          | 0/9600 [00:00<?, ?it/s]

In [6]:
RAW_KNOWLEDGE_BASE[0]

Document(page_content='Caption: Tasmanian berry grower Nic Hansen showing Macau chef Antimo Merone around his property as part of export engagement activities.\nTHE RISE and rise of the Australian strawberry, raspberry and blackberry industries has seen the sectors redouble their international trade focus, with the release of a dedicated export plan to grow their global presence over the next 10 years.\nDriven by significant grower input, the Berry Export Summary 2028 maps the sectors’ current position, where they want to be, high-opportunity markets and next steps.\nHort Innovation trade manager Jenny Van de Meeberg said the value and volume of raspberry and blackberry exports rose by 100 per cent between 2016 and 2017. She said the Australian strawberry industry experienced similar success with an almost 30 per cent rise in export volume and a 26 per cent rise in value to $32.6M over the same period.\n“Australian berry sectors are in a firm position at the moment,” she said. “Product

## Splits documents

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, 
                                               chunk_overlap=100,
                                               add_start_index=True)

texts = text_splitter.split_documents(RAW_KNOWLEDGE_BASE)

In [8]:
texts[0]

Document(page_content='Caption: Tasmanian berry grower Nic Hansen showing Macau chef Antimo Merone around his property as part of export engagement activities.\nTHE RISE and rise of the Australian strawberry, raspberry and blackberry industries has seen the sectors redouble their international trade focus, with the release of a dedicated export plan to grow their global presence over the next 10 years.\nDriven by significant grower input, the Berry Export Summary 2028 maps the sectors’ current position, where they want to be, high-opportunity markets and next steps.\nHort Innovation trade manager Jenny Van de Meeberg said the value and volume of raspberry and blackberry exports rose by 100 per cent between 2016 and 2017. She said the Australian strawberry industry experienced similar success with an almost 30 per cent rise in export volume and a 26 per cent rise in value to $32.6M over the same period.', metadata={'dataset_id': 0, 'start_index': 0})

## Initialize ChromaDB

In [9]:
EMBEDDING_MODEL_NAME = 'intfloat/e5-large'
#'sentence-transformers/all-MiniLM-L6-v2'

embedding_model = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    multi_process=True,
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True},  # set True for cosine similarity
)

# create new database
KNOWLEDGE_VECTOR_DATABASE = Chroma.from_documents(texts, embedding_model, persist_directory="./chroma_db")  # distance_strategy
# load from disk
#KNOWLEDGE_VECTOR_DATABASE = Chroma(persist_directory="./chroma_db1", embedding_function=embedding_model)

In [10]:
KNOWLEDGE_VECTOR_DATABASE.similarity_search(query='What is the Berry Export Summary 2028 and what is its purpose?', k=5)

[Document(page_content='Caption: Tasmanian berry grower Nic Hansen showing Macau chef Antimo Merone around his property as part of export engagement activities.\nTHE RISE and rise of the Australian strawberry, raspberry and blackberry industries has seen the sectors redouble their international trade focus, with the release of a dedicated export plan to grow their global presence over the next 10 years.\nDriven by significant grower input, the Berry Export Summary 2028 maps the sectors’ current position, where they want to be, high-opportunity markets and next steps.\nHort Innovation trade manager Jenny Van de Meeberg said the value and volume of raspberry and blackberry exports rose by 100 per cent between 2016 and 2017. She said the Australian strawberry industry experienced similar success with an almost 30 per cent rise in export volume and a 26 per cent rise in value to $32.6M over the same period.', metadata={'dataset_id': 0, 'start_index': 0}),
 Document(page_content='“Australia

In [11]:
## reranker 
from ragatouille import RAGPretrainedModel

RERANKER = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

In [12]:
def retriever(
    question,
    knowledge_index,
    num_retrieved_docs = 30,
    reranker = None,
    num_docs_final = 5
):
    print("=> Retrieving documents...")
    relevant_docs = knowledge_index.similarity_search(query=question, k=num_retrieved_docs)
    relevant_docs = [doc.page_content for doc in relevant_docs]  # keep only the text
    
    # Optionally rerank results
    if reranker:
        print("=> Reranking documents...")
        relevant_docs = reranker.rerank(question, relevant_docs, k=num_docs_final)
        relevant_docs = [doc["content"] for doc in relevant_docs]

    relevant_docs = relevant_docs[:num_docs_final]
    return relevant_docs

In [13]:
context = retriever('What is the Berry Export Summary 2028 and what is its purpose?',
          KNOWLEDGE_VECTOR_DATABASE,
          reranker=RERANKER)

context

=> Retrieving documents...
=> Reranking documents...


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 15.27it/s]


['Caption: Tasmanian berry grower Nic Hansen showing Macau chef Antimo Merone around his property as part of export engagement activities.\nTHE RISE and rise of the Australian strawberry, raspberry and blackberry industries has seen the sectors redouble their international trade focus, with the release of a dedicated export plan to grow their global presence over the next 10 years.\nDriven by significant grower input, the Berry Export Summary 2028 maps the sectors’ current position, where they want to be, high-opportunity markets and next steps.\nHort Innovation trade manager Jenny Van de Meeberg said the value and volume of raspberry and blackberry exports rose by 100 per cent between 2016 and 2017. She said the Australian strawberry industry experienced similar success with an almost 30 per cent rise in export volume and a 26 per cent rise in value to $32.6M over the same period.',
 '“We have a great product, we are hungry to expand trade and now with this new plan in place, we have 

## Initialize LLM

In [14]:
llm = Ollama(model="llama2:7b-chat")

## Create the chain

In [17]:
# create chain for documents
template = """Answer the following question based only on the provided context with maximum of 15 tokens:

<context>
{context}
</context>

Question: {input}
"""

prompt = ChatPromptTemplate.from_template(template)
document_chain = create_stuff_documents_chain(llm, prompt)

In [18]:
from langchain_core.documents import Document
document_chain.invoke({
    "input": 'What is the Berry Export Summary 2028 and what is its purpose?',
    "context": [Document(page_content=txt) for txt in context]
})

"The Berry Export Summary 2028 is a dedicated export plan to grow the global presence of Australian strawberry, raspberry, and blackberry industries over the next 10 years. Its purpose is to provide a roadmap for the sectors' international trade focus, identifying high-opportunity markets and outlining levy-funded activities to support trade growth."

## All-in-one RAG

In [18]:
retriever = KNOWLEDGE_VECTOR_DATABASE.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [19]:
response = retrieval_chain.invoke({
    "input": 'What is the Berry Export Summary 2028 and what is its purpose?'
})

In [20]:
response['answer']

"The Berry Export Summary 2028 is a dedicated export plan that outlines the strategies for growing the Australian strawberry, raspberry, and blackberry industries' global presence over the next 10 years. The purpose of this summary is to map the sectors' current position, identify high-opportunity markets, and outline next steps for the industry to achieve a 5% boost in exports across identified markets by 2021 for raspberries and blackberries, and at least an 8% increase in exports by volume in markets with a capacity and willingness to pay a premium for quality fruit."