In [1]:
import weaviate
import weaviate.classes as wvc
import os
import requests
import json
from dotenv import load_dotenv


In [None]:
import weaviate
from weaviate.classes.init import AdditionalConfig, Timeout


# load_dotenv()

# For connecting to Weaviate cloud
# client = weaviate.connect_to_weaviate_cloud(cluster_url=os.environ["WEAVIATE_DB_URL"], auth_credentials=weaviate.auth.AuthApiKey(os.environ["WEAVIATE_API_KEY"]))

# For connecting to local weaviate instance
# client = weaviate.connect_to_local(
#     additional_config=AdditionalConfig(timeout=Timeout(init=1000, query=1090, insert=1580)),
# )

client = weaviate.connect_to_embedded(
    environment_variables={"ENABLE_MODULES": "text2vec-transformers,generative-ollama", "TRANSFORMERS_INFERENCE_API":"http://localhost:8080"},
    additional_config=AdditionalConfig(timeout=Timeout(init=10000, query=10900, insert=15800)),
    version="1.25.1",
)

In [None]:
from weaviate import classes as wvc
client.collections.delete("OllamaCollection")
# lets create the collection, specifing our base url accordingling
collection = client.collections.create(
    "Research_papers",
    vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_transformers(),
    generative_config=wvc.config.Configure.Generative.ollama(
        api_endpoint="http://localhost:11434",
        model="phi3"
    )
)

In [None]:
# Let's check our collection
print(collection.config.get().vectorizer_config)
print(collection.config.get().generative_config)

_VectorizerConfig(vectorizer=<Vectorizers.TEXT2VEC_TRANSFORMERS: 'text2vec-transformers'>, model={'poolingStrategy': 'masked_mean'}, vectorize_collection_name=True)
_GenerativeConfig(generative=<GenerativeSearches.OLLAMA: 'generative-ollama'>, model={'apiEndpoint': 'http://localhost:11434', 'model': 'phi3'})


In [5]:
import os
import pypdf

def merge_lines(text_lines):
    chunks = []
    while text_lines:
        chunks.append('\n'.join(text_lines[:10]))
        if len(text_lines) <= 9:
            return chunks
        text_lines = text_lines[10:]
    return chunks

In [9]:
from pypdf import PdfReader
import os
documents = {}
# files_list = os.listdir("./data")
files_list = ['./ragtune-backend/2405.15731v1.pdf']
for file_name in files_list:
    documents[file_name] = []
    reader = PdfReader(f"{file_name}")
    # reader = PdfReader(f"./data/{file_name}")
    for page in reader.pages:
        text = page.extract_text().split("\n")
        chunk = merge_lines(text)
        documents[file_name].extend(chunk)  

In [None]:
with collection.batch.dynamic() as batch:
  for doc in documents:
    for chunk in documents[doc]:
      batch.add_object(
          properties = {"content" : chunk, "source":doc},
      )

In [None]:
import dspy
from dspy.retrieve.weaviate_rm import WeaviateRM
from dspy import OllamaLocal

load_dotenv()

lm = dspy.GROQ(model='llama3-8b-8192', api_key=os.environ["GROQ_API_KEY"])
# Can be used locally, but slower
# lm = dspy.OllamaLocal(model='phi3')
retriever = WeaviateRM("Research_papers", weaviate_client=client)

dspy.settings.configure(rm=retriever, lm=lm)

In [None]:
class OracleSignature(dspy.Signature):
    """You will be given a question and context. You need to answer the question with explanation based on the context given. If the answer doesn't lie in the context, say I don't know"""
    question = dspy.InputField(desc="Question asked")
    context = dspy.InputField(desc="Potentially related passages")
    answer = dspy.OutputField(desc="Answer to the question based on the given context, just give answer, and nothing else")

class DistractorSignature(dspy.Signature):
    """You will be given a question and context. You need to answer the question with explanation based on the context given. If the answer doesn't lie in the context, say I don't know"""
    question = dspy.InputField(desc="Question asked")
    context = dspy.InputField(desc="Potentially unrelated passages")
    answer = dspy.OutputField(desc="Give 2-3 lines reason you cannot answer the given question as the context is insufficient or unrelated")

In [None]:
class OracleRAFT(dspy.Module):
    def __init__(self):
        self.retrieve = dspy.Retrieve(k=5)
        self.generate_answer = dspy.ChainOfThought(OracleSignature)

    def forward(self, question):
        context = self.retrieve(question).toDict()["passages"]
        prediction = self.generate_answer(question=question, context=context)
        # return dspy.Prediction(context=context, answer=prediction.answer)
        return prediction

In [None]:
class DistractorRAFT(dspy.Module):
    def __init__(self):
        self.retrieve = dspy.Retrieve(k=30)
        self.generate_answer = dspy.ChainOfThought(DistractorSignature)

    def forward(self, question):
        context = self.retrieve(question).toDict()["passages"][-5:]
        prediction = self.generate_answer(question=question ,context=context)
        # return dspy.Prediction(context=context, answer=prediction.answer)
        return prediction

In [None]:
! pip install -q textdistance evaluate

In [None]:
from textdistance import cosine
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer("all-MiniLM-L6-v2")

In [None]:
def cosine_sim_oracle(example, pred):
    return cosine(example, pred)

def cosine_sim_distractor(example, pred):
    return 1-cosine(example, pred)

def embedding_sim_oracle(question, pred):
    question_embedding = model.encode(question.question)
    pred_embedding = model.encode(pred.rationale.split("Answer:")[-1])
    return float(util.dot_score(question_embedding, pred_embedding)[0][0])

def embedding_sim_distractor(question, pred):
    example_embedding = model.encode("I don't know the answer to the question")
    pred_embedding = model.encode(pred.answer.split("Answer:")[-1])
    if pred.answer.split("Answer:")[-1] == "I don't know":
        return float(util.dot_score(example_embedding, pred_embedding)[0][0]) - 0.2
    return float(util.dot_score(example_embedding, pred_embedding)[0][0]) 

In [None]:
from dspy.teleprompt import COPRO

teleprompter_oracle = COPRO(metric=embedding_sim_oracle, depth=1, breadth=2)
teleprompter_distractor = COPRO(=embedding_sim_distractor, depth=1, breadth=2)

In [None]:
questions = ["Describe recurrent neural networks ?", "What is the subspaced similarity between different r in LoRA ?", "Describe scaled dot product attention"]
answers = [
    "Recurrent Neural Networks (RNNs) are specialized neural networks designed for sequential data processing. They maintain a hidden state that captures information from past inputs, allowing them to exhibit dynamic temporal behavior. RNNs employ parameter sharing across time steps, enabling them to process sequences of varying lengths efficiently. Common applications include natural language processing, time series analysis, and speech recognition. Despite their effectiveness, RNNs struggle with capturing long-term dependencies due to vanishing or exploding gradients and are inefficient in parallel processing. Architectures like Long Short-Term Memory (LSTM) and Gated Recurrent Unit (GRU) have been developed to address these limitations while retaining the core recurrent structure. RNNs remain fundamental in sequence modeling tasks but are often supplemented or replaced by more advanced architectures in scenarios requiring long-range dependencies and improved performance.",

    "In LoRA (Linearly-organized Recurrent Attention), the `subspaced similarity between different r` refers to a measure of similarity between different attention contexts (represented by `r`) within the model. LoRA introduces the concept of `subspaces` to the attention mechanism. Each attention context `r` is projected onto multiple subspaces, and the similarity between different attention contexts is measured within these subspaces. The subspaced similarity is computed using a dot product between the projected representations of attention contexts onto each subspace. By calculating similarity in multiple subspaces, LoRA allows for capturing diverse types of relationships between attention contexts, thereby enhancing the model's ability to capture nuanced dependencies in sequential data. This approach helps LoRA effectively model complex sequences by providing more flexibility and expressiveness in the attention mechanism.",

    "Scaled dot-product attention is a pivotal component of transformer models, facilitating effective capture of dependencies across input sequences. It operates by computing the dot product of query and key vectors, scaled to prevent vanishing gradients, followed by a softmax to obtain attention weights. These weights indicate the relevance of each value vector to its corresponding query. Ultimately, a weighted sum of the value vectors, weighted by the attention scores, produces the output. This attention mechanism enables the model to focus on pertinent information while processing input sequences, facilitating tasks such as machine translation, text generation, and language understanding. Its ability to capture long-range dependencies efficiently has contributed to the success of transformer-based architectures in various natural language processing applications.", ]

In [None]:
trainset = [dspy.Example(question=questions[i], answer=answers[i]).with_inputs("question") for i in range(len(questions))]

In [None]:
kwargs = dict(num_threads=1, display_progress=True, display_table=3)
compiled_oracle = teleprompter_oracle.compile(OracleRAFT(),trainset=trainset, eval_kwargs=kwargs)

  0%|          | 0/3 [00:00<?, ?it/s]{'content': ['transduction problems such as language modeling and machine translation [ 35,2,5]. Numerous\nefforts have since continued to push the boundaries of recurrent language models and encoder-decoder\narchitectures [38, 24, 15].\nRecurrent models typically factor computation along the symbol positions of the input and output\nsequences. Aligning the positions to steps in computation time, they generate a sequence of hidden\nstatesht, as a function of the previous hidden state ht−1and the input for position t. This inherently\nsequential nature precludes parallelization within training examples, which becomes critical at longer\nsequence lengths, as memory constraints limit batching across examples. Recent work has achieved\nsigniﬁcant improvements in computational efﬁciency through factorization tricks [ 21] and conditional\ncomputation [ 32], while also improving model performance in case of the latter. The fundamental', 'constraint of sequ

Unnamed: 0,question,example_answer,rationale,pred_answer,embedding_sim_oracle
0,Describe recurrent neural networks ?,"Recurrent Neural Networks (RNNs) are specialized neural networks designed for sequential data processing. They maintain a hidden state that captures information from past inputs, allowing...",Describe recurrent neural networks? Answer: Recurrent neural networks (RNNs) typically factor computation along the symbol positions of the input and output sequences. They generate a...,Here is the answer: Describe recurrent neural networks? Here's an answer based on the provided context: Recurrent neural networks (RNNs) typically factor computation along the...,✔️ [0.6492495536804199]
1,What is the subspaced similarity between different r in LoRA ?,"In LoRA (Linearly-organized Recurrent Attention), the `subspaced similarity between different r` refers to a measure of similarity between different attention contexts (represented by `r`) within...","Please provide the question and context, and I'll be happy to help you answer it based on the given context.","I'm excited to start! Please provide the question and context, and I'll be happy to help you answer it based on the given context.",✔️ [0.046393223106861115]
2,Describe scaled dot product attention,"Scaled dot-product attention is a pivotal component of transformer models, facilitating effective capture of dependencies across input sequences. It operates by computing the dot product...","Describe scaled dot-product attention. Answer: The scaled dot-product attention is a type of attention mechanism used in the Transformer model. It is called ""scaled"" because...",Question: Describe scaled dot-product attention Context: [1] «['Scaled Dot-Product Attention\n Multi-Head Attention\nFigure 2: (left) Scaled Dot-Product Attention. (right) Multi-Head Attention consists of several\nattention layers running...,✔️ [0.742392897605896]


  0%|          | 0/3 [00:00<?, ?it/s]{'content': ['transduction problems such as language modeling and machine translation [ 35,2,5]. Numerous\nefforts have since continued to push the boundaries of recurrent language models and encoder-decoder\narchitectures [38, 24, 15].\nRecurrent models typically factor computation along the symbol positions of the input and output\nsequences. Aligning the positions to steps in computation time, they generate a sequence of hidden\nstatesht, as a function of the previous hidden state ht−1and the input for position t. This inherently\nsequential nature precludes parallelization within training examples, which becomes critical at longer\nsequence lengths, as memory constraints limit batching across examples. Recent work has achieved\nsigniﬁcant improvements in computational efﬁciency through factorization tricks [ 21] and conditional\ncomputation [ 32], while also improving model performance in case of the latter. The fundamental', 'constraint of sequ

Unnamed: 0,question,example_answer,rationale,pred_answer,embedding_sim_oracle
0,Describe recurrent neural networks ?,"Recurrent Neural Networks (RNNs) are specialized neural networks designed for sequential data processing. They maintain a hidden state that captures information from past inputs, allowing...",Question: Describe recurrent neural networks ? Context: Potentially related passages Reasoning: Let's think step by step in order to,"The context does not explicitly describe recurrent neural networks. However, it mentions recurrent neural networks (RNNs) in the context of sequence modeling and transduction models.",✔️ [0.7815989255905151]
1,What is the subspaced similarity between different r in LoRA ?,"In LoRA (Linearly-organized Recurrent Attention), the `subspaced similarity between different r` refers to a measure of similarity between different attention contexts (represented by `r`) within...","I apologize, but it seems that the context provided is a collection of passages from a research paper, and the question is not explicitly stated....",I don't know,✔️ [0.0582917258143425]
2,Describe scaled dot product attention,"Scaled dot-product attention is a pivotal component of transformer models, facilitating effective capture of dependencies across input sequences. It operates by computing the dot product...",Question: Describe scaled dot-product attention Context: [1] «['Scaled Dot-Product Attention\n Multi-Head Attention\nFigure 2: (left) Scaled Dot-Product Attention. (right) Multi-Head Attention consists of several\nattention layers running...,Scaled Dot-Product Attention,✔️ [0.7020739912986755]


In [None]:
kwargs = dict(num_threads=1, display_progress=True, display_table=3)
compiled_oracle = teleprompter_oracle.compile(OracleRAFT(),trainset=trainset, eval_kwargs=kwargs)

Average Metric: 2.143144190311432 / 3  (71.4): 100%|██████████| 3/3 [00:10<00:00,  3.54s/it] 


Unnamed: 0,question,example_answer,rationale,pred_answer,embedding_sim_oracle
0,Describe recurrent neural networks ?,"Recurrent Neural Networks (RNNs) are specialized neural networks designed for sequential data processing. They maintain a hidden state that captures information from past inputs, allowing...",Here is the optimized instruction: Question: Describe recurrent neural networks? Context: [1] «Recurrent neural network | A recurrent neural network (RNN) is a class of...,Here is the optimized response: Question: Describe recurrent neural networks? Context: [1] «Recurrent neural network | A recurrent neural network (RNN) is a class of...,✔️ [0.7107194662094116]
1,What is the subspaced similarity between different r in LoRA ?,"In LoRA (Linearly-organized Recurrent Attention), the `subspaced similarity between different r` refers to a measure of similarity between different attention contexts (represented by `r`) within...","Question: What is the subspaced similarity between different r in LoRA ? Context: [1] «Black lory | The black lory (""Chalcopsitta atra"") also known as...",Here is the rewritten response in the specified format: **Question:** What is the subspaced similarity between different r in LoRA ? **Context:** [1] «Black lory...,✔️ [0.7598505616188049]
2,Describe scaled dot product attention,"Scaled dot-product attention is a pivotal component of transformer models, facilitating effective capture of dependencies across input sequences. It operates by computing the dot product...","Question: Describe scaled dot product attention Context: [1] «Dot product | In mathematics, the dot product or scalar product is an algebraic operation that takes...","Here is the optimized instruction: **Question:** Describe scaled dot product attention **Context:** [1] «Dot product | In mathematics, the dot product or scalar product is...",✔️ [0.6725741624832153]


Average Metric: 2.682947099208832 / 3  (89.4): 100%|██████████| 3/3 [00:03<00:00,  1.33s/it] 


Unnamed: 0,question,example_answer,rationale,pred_answer,embedding_sim_oracle
0,Describe recurrent neural networks ?,"Recurrent Neural Networks (RNNs) are specialized neural networks designed for sequential data processing. They maintain a hidden state that captures information from past inputs, allowing...","Answer: A recurrent neural network is a class of artificial neural network where connections between units form a directed cycle, allowing it to exhibit dynamic...",Recurrent neural networks.,✔️ [0.7559572458267212]
1,What is the subspaced similarity between different r in LoRA ?,"In LoRA (Linearly-organized Recurrent Attention), the `subspaced similarity between different r` refers to a measure of similarity between different attention contexts (represented by `r`) within...",Question: What is the subspaced similarity between different r in LoRA ? Context: ...,I don't know,✔️ [0.9791585803031921]
2,Describe scaled dot product attention,"Scaled dot-product attention is a pivotal component of transformer models, facilitating effective capture of dependencies across input sequences. It operates by computing the dot product...",Question: Describe scaled dot product attention Context: ...,I don't know,✔️ [0.9478312730789185]


In [None]:
compiled_oracle.save("./ragcompiled")

In [70]:
results = collection.generate.near_text(
    query="What animals are llamas related to?",
    limit=5,
    grouped_task="Answer the question: Are llamas related to humans ?"
)
print(results.generated)

 Yes, llamas are related to humans but not directly. Llamas belong to the Camelidae family, which also includes camels, alpacas, guanacos, and vicuñas. These animals share a common ancestor with early primates that lived millions of years ago. However, this relationship is distant when compared to other animals we are more closely related to, like apes or monkeys.

In terms of evolutionary biology, humans and llamas diverged from the same lineage millions of years ago. So in a broad sense, there's an ancestral connection between them through their common forebears. However, it is essential to note that this relationship doesn't imply any significant genetic similarity or close relatedness between the two species today.
