# Lib list

In [1]:
!pip3 install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

In [2]:
!pip install -q transformers ipywidgets

In [3]:
!pip install -q scikit-learn

In [4]:
#libraries installed in venv
!pip list

Package            Version
------------------ ------------
asttokens          2.4.1
certifi            2024.2.2
charset-normalizer 3.3.2
colorama           0.4.6
comm               0.2.2
debugpy            1.8.1
decorator          5.1.1
executing          2.0.1
filelock           3.13.1
fsspec             2024.2.0
huggingface-hub    0.22.2
idna               3.7
intel-openmp       2021.4.0
ipykernel          6.29.4
ipython            8.24.0
ipywidgets         8.1.2
jedi               0.19.1
Jinja2             3.1.3
joblib             1.4.0
jupyter_client     8.6.1
jupyter_core       5.7.2
jupyterlab_widgets 3.0.10
MarkupSafe         2.1.5
matplotlib-inline  0.1.7
mkl                2021.4.0
mpmath             1.3.0
nest-asyncio       1.6.0
networkx           3.2.1
numpy              1.26.3
packaging          24.0
parso              0.8.4
pillow             10.2.0
pip                24.0
platformdirs       4.2.1
prompt-toolkit     3.0.43
psutil             5.9.8
pure-eval          0.2.2

# Libs

In [5]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from transformers import pipeline

# Params

In [6]:
SELECTED_MODEL = 'NousResearch/Llama-2-7b-hf'
DATA_SOURCE = 'ragnaros.txt'

# Funcs and Classes

In [7]:
class SimpleTFIDFRetriever:
    def __init__(self, document_path):
        with open(document_path, 'r', encoding='utf-8') as file:
            self.documents = file.read().split('\n\n')
        self.vectorizer = TfidfVectorizer()
        self.tfidf_matrix = self.vectorizer.fit_transform(self.documents)
    
    def retrieve(self, query, top_n=1):
        query_vector = self.vectorizer.transform([query])
        similarities = cosine_similarity(query_vector, self.tfidf_matrix).flatten()
        indices = np.argsort(-similarities)[:top_n]
        return [self.documents[i] for i in indices]

In [8]:
class LLaMAGenerator:
    def __init__(self, model_name):
        self.generator = pipeline('text-generation', model=model_name, device=0)

    def generate(self, prompt, max_length=250):
        return self.generator(prompt, max_length=max_length, num_return_sequences=1)[0]['generated_text']


In [9]:
class RAG:
    def __init__(self, retriever, generator):
        self.retriever = retriever
        self.generator = generator
    
    def answer(self, query):
        contexts = self.retriever.retrieve(query)
        answers = [self.generator.generate(context) for context in contexts]
        return answers

retriever = SimpleTFIDFRetriever(DATA_SOURCE)
generator = LLaMAGenerator(SELECTED_MODEL)

rag = RAG(retriever, generator)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [10]:
# Example query
query = "Who defeated Ragnaros?"
answers = rag.answer(query)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


In [13]:
print(answers[0].replace('.', '\n'))

As Malfurion Stormrage rallied Hyjal's defenders in a massive counterattack, the Circle stormed into the Firelands
 Soon, the Circle was reinforced by the Alliance and Horde who pushed further into the Firelands until they reached Sulfuron Keep
[18][19] During the battle against Ragnaros in the normal dungeon he retreats at 10% health saying that his attackers have come too soon
 However during the heroic fight when Ragnaros attempts to withdraw from the battle, he is interrupted by the arrival of Cenarius, Malfurion, and Hamuul, who each channel a powerful spell that solidifies the lava surrounding the Firelord, binding him
 With nowhere left to run Ragnaros bursts out of the lava and in a final battle with the adventurers, the fire lord is slain
 Due to Ragnaros dying in his plane, the Firelands, he also suffers the same fate as Al'Akir, that being eternal death
 He was the second Elemental Lord to die

Ragnaros's death was a
