In [1]:
import os
import shutil

import torch
from torch import cuda, bfloat16
import transformers
from transformers import AutoTokenizer

from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.chroma import Chroma
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings

In [2]:
EMBEDDING_MODEL = 'sentence-transformers/all-mpnet-base-v2'
LLM_MODEL = 'meta-llama/Llama-2-7b-chat-hf'
CHROMA_PATH = "chroma"

## Load LLM

In [3]:
model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

model_config = transformers.AutoConfig.from_pretrained(LLM_MODEL)
model = transformers.AutoModelForCausalLM.from_pretrained(
    LLM_MODEL,
    trust_remote_code=True,
    config=model_config,
    #quantization_config=bnb_config,
    device_map='auto',
)
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
query_pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        device_map="auto")
llm = HuggingFacePipeline(pipeline=query_pipeline)

In [32]:
def test_model(tokenizer, pipeline, prompt_to_test):
    """
    Perform a query
    print the result
    Args:
        tokenizer: the tokenizer
        pipeline: the pipeline
        prompt_to_test: the prompt
    Returns
        None
    """
    # adapted from https://huggingface.co/blog/llama2#using-transformers
    time_1 = time()
    sequences = pipeline(
        prompt_to_test,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=200,)
    time_2 = time()
    print(f"Test inference: {round(time_2-time_1, 3)} sec.")
    for seq in sequences:
        print(f"Result: {seq['generated_text']}")

## Init vector DB

In [5]:
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
db = Chroma(embedding_function=embeddings, persist_directory=CHROMA_PATH)

In [6]:
db.similarity_search("Romeo")

[Document(page_content='ROMEO. [_Aside._] Shall I hear more, or shall I speak at this?\n\nJULIET. ’Tis but thy name that is my enemy; Thou art thyself, though not a Montague. What’s Montague? It is nor hand nor foot, Nor arm, nor face, nor any other part Belonging to a man. O be some other name. What’s in a name? That which we call a rose By any other name would smell as sweet; So Romeo would, were he not Romeo call’d, Retain that dear perfection which he owes Without that title. Romeo, doff thy name, And for thy name, which is no part of thee, Take all myself.\n\nROMEO. I take thee at thy word. Call me but love, and I’ll be new baptis’d; Henceforth I never will be Romeo.\n\nJULIET. What man art thou that, thus bescreen’d in night So stumblest on my counsel?\n\nROMEO. By a name I know not how to tell thee who I am: My name, dear saint, is hateful to myself, Because it is an enemy to thee. Had I it written, I would tear the word.\n\nJULIET. My ears have yet not drunk a hundred words Of 

## Build RAG pipeline

In [7]:
retriever = db.as_retriever()

qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=False
)

## Test

In [27]:
query = '''In the Inferno, what three creatures did Dante see in the wood before the gates of hell?'''

In [10]:
result = qa.invoke(query)
print(result['result'])

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Dantès did not answer; he feared that the emotion of his voice would betray him. The jailer went away shaking his head. Night came; Dantès hoped that his neighbor would profit by the silence to address him, but he was mistaken. The next morning, however, just as he removed his bed from the wall, he heard three knocks; he threw himself on his knees.

“Is it you?” said he; “I am here.”

“Is your jailer gone?”

“Yes,” said Dantès; “he will not return until the evening; so that we have twelve hours before us.”

“I can work, then?” said the voice.

“Oh, yes, yes; this instant, I entreat you.”

In a moment that part of the floor on which Dantès was resting his two hands, as he knelt with his head in the opening, suddenly gave way; he drew back smartly, while a mass of stones and earth disappeared in a hole that opened beneath the 

In [23]:
?db.similarity_search

[0;31mSignature:[0m
[0mdb[0m[0;34m.[0m[0msimilarity_search[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mquery[0m[0;34m:[0m [0;34m'str'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mk[0m[0;34m:[0m [0;34m'int'[0m [0;34m=[0m [0;36m4[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfilter[0m[0;34m:[0m [0;34m'Optional[Dict[str, str]]'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0mkwargs[0m[0;34m:[0m [0;34m'Any'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m'List[Document]'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Run similarity search with Chroma.

Args:
    query (str): Query text to search for.
    k (int): Number of results to return. Defaults to 4.
    filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.

Returns:
    List[Document]: List of documents most similar to the query text.
[0;31mFile:[0m      /opt/miniconda3/envs/chatbot/lib/python3.12/site-packages/lan

In [28]:
query_results = db.similarity_search(query, k=10)
print(len(query_results))

10


In [29]:
for r in query_results:
 print(r.metadata['source'])

documents/books/divinecomedy.txt
documents/books/countmontecristo.txt
documents/books/countmontecristo.txt
documents/books/divinecomedy.txt
documents/books/countmontecristo.txt
documents/books/countmontecristo.txt
documents/books/countmontecristo.txt
documents/books/countmontecristo.txt
documents/books/countmontecristo.txt
documents/books/countmontecristo.txt


In [31]:
query_results[3]

Document(page_content='Gives sentence, and dismisses them beneath, According as he foldeth him around: For when before him comes th’ ill fated soul, It all confesses; and that judge severe Of sins, considering what place in hell Suits the transgression, with his tail so oft Himself encircles, as degrees beneath He dooms it to descend. Before him stand Always a num’rous throng; and in his turn Each one to judgment passing, speaks, and hears His fate, thence downward to his dwelling hurl’d.\n\n“O thou! who to this residence of woe Approachest?” when he saw me coming, cried Minos, relinquishing his dread employ, “Look how thou enter here; beware in whom Thou place thy trust; let not the entrance broad Deceive thee to thy harm.” To him my guide: “Wherefore exclaimest? Hinder not his way By destiny appointed; so ’tis will’d Where will and power are one. Ask thou no more.”\n\nNow ’gin the rueful wailings to be heard. Now am I come where many a plaining voice Smites on mine ear. Into a place 

In [16]:
query_results[2]

Document(page_content='It was there he must dig.\n\nBut by some strange play of emotion, in proportion as the proofs that Faria, had not been deceived became stronger, so did his heart give way, and a feeling of discouragement stole over him. This last proof, instead of giving him fresh strength, deprived him of it; the pickaxe descended, or rather fell; he placed it on the ground, passed his hand over his brow, and remounted the stairs, alleging to himself, as an excuse, a desire to be assured that no one was watching him, but in reality because he felt that he was about to faint.\n\nThe island was deserted, and the sun seemed to cover it with its fiery glance; afar off, a few small fishing boats studded the bosom of the blue ocean.\n\nDantès had tasted nothing, but he thought not of hunger at such a moment; he hastily swallowed a few drops of rum, and again entered the cavern.\n\nThe pickaxe that had seemed so heavy, was now like a feather in his grasp; he seized it, and attacked the

In [17]:
query_results[3]

Document(page_content='The night was one of feverish distraction, and in its progress visions, good and evil, passed through Dantès’ mind. If he closed his eyes, he saw Cardinal Spada’s letter written on the wall in characters of flame—if he slept for a moment the wildest dreams haunted his brain. He ascended into grottos paved with emeralds, with panels of rubies, and the roof glowing with diamond stalactites. Pearls fell drop by drop, as subterranean waters filter in their caves. Edmond, amazed, wonderstruck, filled his pockets with the radiant gems and then returned to daylight, when he discovered that his prizes had all changed into common pebbles. He then endeavored to re-enter the marvellous grottos, but they had suddenly receded, and now the path became a labyrinth, and then the entrance vanished, and in vain did he tax his memory for the magic and mysterious word which opened the splendid caverns of Ali Baba to the Arabian fisherman. All was useless, the treasure disappeared, a