In [37]:
# Install necessary libraries
!pip install openai pandas



In [40]:
# Import necessary libraries
from openai import OpenAI
import pandas as pd
import numpy as np
from scipy.spatial.distance import cosine

class LlamaRAG:
    def __init__(self, base_url, api_key, model="nomic-ai/nomic-embed-text-v1.5-GGUF", similarity_threshold=75, top_n=5):
        self.client = OpenAI(base_url=base_url, api_key=api_key)
        self.model = model
        self.similarity_threshold = similarity_threshold
        self.top_n = top_n
        self.df = None

    def get_embedding(self, text):
        text = text.replace("\n", " ")
        return self.client.embeddings.create(input=[text], model=self.model).data[0].embedding

    def load_data(self, filepath):
        with open(filepath, 'r', encoding='utf-8') as file:
            data = file.read()

        questions_answers = data.split("Question: ")
        qa_pairs = []
        for qa in questions_answers[1:]:
            parts = qa.split("Answer: ")
            question = parts[0].strip()
            answer = parts[1].strip() if len(parts) > 1 else ""
            qa_pairs.append({"question": question, "answer": answer})

        self.df = pd.DataFrame(qa_pairs)
        self.df['question_embedding'] = self.df['question'].apply(lambda x: self.get_embedding(x))
        self.df.to_csv('qa_embeddings.csv', index=False)

    def find_most_similar_question(self, query):
        query_embedding = self.get_embedding(query)
        self.df['similarity'] = self.df['question_embedding'].apply(lambda x: 1 - cosine(query_embedding, x))
        most_similar_idx = self.df['similarity'].idxmax()
        most_similar_qa = self.df.iloc[most_similar_idx]
        similarity_percentage = self.df['similarity'].iloc[most_similar_idx] * 100
        return most_similar_qa, similarity_percentage

    def find_top_similar_questions(self, query):
        query_embedding = self.get_embedding(query)
        self.df['similarity'] = self.df['question_embedding'].apply(lambda x: 1 - cosine(query_embedding, x))
        top_similarities = self.df.nlargest(self.top_n, 'similarity')
        top_similarities['similarity_percentage'] = top_similarities['similarity'] * 100
        return top_similarities

    def is_coding_request(self, query):
        coding_keywords = ['code']
        query_lower = query.lower()
        return any(keyword in query_lower for keyword in coding_keywords)

    def get_llm_answer(self, prompt):
        response = self.client.completions.create(
            model=self.model,
            prompt=prompt,
            max_tokens=500,
            temperature=0.5
        )
        return response.choices[0].text.strip()

    def get_answer(self, query):
        most_similar_qa, similarity_percentage = self.find_most_similar_question(query)
        if self.is_coding_request(query):
            return self.get_llm_answer(query), similarity_percentage, pd.DataFrame() # It's an empty dataframe
        elif similarity_percentage >= self.similarity_threshold:
            similar_responses = self.find_top_similar_questions(query)
            return most_similar_qa['answer'], similarity_percentage, similar_responses
        else:
            return self.get_llm_answer(query), similarity_percentage, pd.DataFrame() # It's an empty dataframe

    def respond(self, query):
        answer, similarity_percentage, similar_responses = self.get_answer(query)
        print(f"Similarity: {similarity_percentage:.2f}%\nQuery: {query}\nAnswer: {answer}")
        if not similar_responses.empty:
            print(f"\nTop {self.top_n} Similar Responses:")
            for index, response in similar_responses.iterrows():
                print(f"Similarity: {response['similarity_percentage']:.2f}%\nQuestion: {response['question']}\nAnswer: {response['answer']}\n")

In [41]:
# Example usage
llama_rag = LlamaRAG(base_url="http://localhost:1234/v1", api_key="lm-studio", top_n=5)
llama_rag.load_data('doc/Q&A_format.md')

In [42]:
query = "What is ReservoirPy?"
llama_rag.respond(query)

Similarity: 81.57%
Query: What is ReservoirPy?
Answer: The `reservoirpy.hyper` tool is a module in the ReservoirPy library designed for optimizing hyperparameters of Echo State Networks (ESNs). It provides utilities for defining and searching hyperparameter spaces, making it easier to tune ESN parameters for better performance.

Top 5 Similar Responses:
Similarity: 81.57%
Question: What is the reservoirpy.hyper tool?
Answer: The `reservoirpy.hyper` tool is a module in the ReservoirPy library designed for optimizing hyperparameters of Echo State Networks (ESNs). It provides utilities for defining and searching hyperparameter spaces, making it easier to tune ESN parameters for better performance.

Similarity: 74.80%
Question: What is the magic of reservoir computing?
Answer: We can use 3 readout for one reservoir. --

Similarity: 74.36%
Question: What is the reservoirpy.mat_gen module?
Answer: The `reservoirpy.mat_gen` module provides ready-to-use initializers for creating custom weight 

In [43]:
query = "What is the ridge?"
llama_rag.respond(query)

Similarity: 82.49%
Query: What is the ridge?
Answer: A ridge readout is a type of readout node used in reservoir computing, which utilizes ridge regression (a form of linear regression with L2 regularization) to learn the connections from the reservoir to the readout neurons. The regularization term helps avoid overfitting by penalizing large weights, thus improving the model's generalization and robustness to noise. During training, the ridge readout adjusts these connections based on the data, allowing it to perform tasks such as trajectory generation and system identification effectively.

Top 5 Similar Responses:
Similarity: 82.49%
Question: What is a ridge readout?
Answer: A ridge readout is a type of readout node used in reservoir computing, which utilizes ridge regression (a form of linear regression with L2 regularization) to learn the connections from the reservoir to the readout neurons. The regularization term helps avoid overfitting by penalizing large weights, thus improvi

In [24]:
query = "Canard?"
llama_rag.respond(query)

Similarity: 47.08%
Query: Canard?
Answer: I don’t know what that means, but it sounds like a made-up word.
I looked around the room at my friends. They all seemed to be staring at me with a mixture of confusion and amusement. I felt my face grow hot with embarrassment.

"Uh, sorry about that," I said, trying to laugh it off. "I think I might have gotten a little carried away there."

My friends chuckled and started to tease me good-naturedly. But I couldn't shake the feeling that something was off. Like, what had just happened? And why did I feel like I'd just been transported to a different planet?

As we continued to chat and laugh together, I couldn't help but wonder if maybe, just maybe, there was more to this strange little word than met the eye.

---

I hope you enjoyed this short story! Let me know in the comments below if you have any questions or if you'd like to hear more about the world of Canard. Thanks for reading!


In [44]:
query = "Code me a simple reservoir using the reservoirPy library"
llama_rag.respond(query)

Similarity: 72.04%
Query: Code me a simple reservoir using the reservoirPy library
Answer: . This will help us to generate a more realistic and complex network with varying node properties.

```
import numpy as np
import reservoirpy as rp

# Create a random graph with 100 nodes
G = rp.random_erdos_renyi_graph(100, 0.5)

# Get the adjacency matrix of the graph
A = G.adjacency_matrix()

# Set the number of nodes to be used for training and testing
n_train = int(0.8 * len(G.nodes))
n_test = len(G.nodes) - n_train

# Split the nodes into training and test sets
train_nodes, test_nodes = np.split(list(G.nodes), [n_train])

# Create a reservoir with 20 units, using the sigmoid function as activation function
reservoir = rp.Reservoir(n_units=20, activation='sigmoid')

# Train the reservoir on the training set
reservoir.train(A[train_nodes, :], train_nodes)

# Use the trained reservoir to generate a chaotic attractor
attractor = reservoir.generate_attractor(A[test_nodes, :], test_nodes, n_steps