# Yosemite

### RAG

In [4]:
# Install Library
# ! pip install yosemite --upgrade

! yosemite

[38;2;255;165;0m[1mYosemite[0m[38;2;255;165;0m[0m
[38;2;128;128;128m[3mHammad Saeed[0m[38;2;128;128;128m[0m
[38;2;68;68;68m[3mhttps://code.hammad.fun[0m[38;2;68;68;68m[0m
[38;2;68;68;68m[3mhttps://github.com/hsaeed3/yosemite[0m[38;2;68;68;68m[0m
[38;2;128;128;128m[3m0.1.xxx - Half Dome[0m[38;2;128;128;128m[0m


In [5]:
from yosemite import Yosemite

# Yosemite Core (Simple Text Styling / Python Utils)
yosemite = Yosemite()
yosemite.say("Hello!", "yellow")
yosemite.say("Welcome to One Class RAG!", color="rgb(0, 0, 0)", bg="rgb(200, 255, 200)", bold=True) 

[33mHello![0m
[48;2;200;255;200m[38;2;0;0;0m[1mWelcome to One Class RAG![0m[48;2;200;255;200m[38;2;0;0;0m[0m[48;2;200;255;200m[0m


In [6]:
# Import RAG Module
from yosemite.ml import RAG

# The RAG Module is Built off several other Modules from this Library

# from yosemite.llms import LLM

# These classes are built right on top of sentence-transformers, just to make using them a bit easier
# from yosemite.ml import CrossEncoder, SentenceTransformer

# spaCy
# from yosemite.ml.text import Chunker

In [7]:
# Currently the RAG class only supports a handful of API's, but local models
# and more API support shall be added indeed.
# Universal Huggingface Transformers are my next planned addition
    # Current Providers:
    # - OpenAI : 'openai'
    # - Anthropic Claude : 'anthropic'
    # - NVIDIA API : 'nvidia'

# Lets choose a provider now
provider = "openai"
api_key = "" # Add your Provider's API key.

# Initialize the class with a provider
rag = RAG(
    provider=provider, 
    api_key=api_key,
    # base_url = for NVIDIA models
)


LLM initialized with provider: openai


In [8]:
# The RAG Class uses a 'Universal Database' using Annoy and Whoosh for both elastic
# and Vector Search
# from yosemite.ml import Database

# Lets Create a blank database now
rag.build()

# As you can see a /databases/db directory has been generated, this is the default 
# path the module uses, you may specify your own with:
    # rag.build(db="<PATH TO YOUR DB || OR || AN EXISTING DATABASE")

Creating New Database... @ default path = './databases/db'


In [9]:
# Currently the Database can load in both lists/strings/tuples of text, or it is able to parse through
# Directories and extract -> chunk -> clean -> vectorize CSV's, PDF's and .txt with a comfy schema to use.
# The DB itself and all its classes are accessible through the RAG using:
    # RAG.db.function()
# Using RAG.db also provides quick access to the Whoosh backend, where all it's functions are usable.

# Lets add some documents now!
# The /documents directory provides 2 movie scripts and a paper on Quiet-STar learning.
rag.db.load_docs("documents/")

# The Pipeline here is extensive, so it may take a second.
# SentenceBERT is used for Embeddings
# spaCy is used for chunking/tokenizing

In [10]:
# Now that our documents are loaded, lets mess with our RAG agent's personality a little
# This stage is optional, and is not essential to using this pipeline but it makes it a but cooler
rag.customize(
    name = "Lightning McQueen",
    role = "Racecar",
    tone = "friendly",
    additional_instructions= "Answer everything incorporating your signature keywords like 'KACHOW!'"
    # goal = """
)

In [11]:
# Alright, looks like we're all set to go!
# Lets send a query to our RAG.

# For this test case, the Quiet-STar paper was placed as one of the documents specifically; as it was trained
# Incredibly recently (March 14 2024)

# Generate a response from the RAG agent and print it.
response = rag.invoke(
    query = "What is Quiet-STar learning?", 

)

# Print Response 
yosemite.say(response, color="black")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[32mHey there! So, Quiet-STar learning is a process that involves training a model to understand patterns and information from a given dataset. It uses various algorithms to analyze and learn from the data to make predictions or provide insights. In simpler terms, it's all about using mathematical techniques to help computers learn from data and make smart decisions. KACHOW![0m
