In [29]:
%%html

<link href="https://unpkg.com/tailwindcss@^2/dist/tailwind.min.css" rel="stylesheet">
<link href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/themes/prism-tomorrow.min.css" rel="stylesheet" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/prism.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/plugins/toolbar/prism-toolbar.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/plugins/copy-to-clipboard/prism-copy-to-clipboard.min.js"></script>

<style>
@import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&family=Lora:ital,wght@0,400;0,500;0,600;0,700;1,400;1,500;1,600;1,700&family=JetBrains+Mono:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;0,800;1,100;1,200;1,300;1,400;1,500;1,600;1,700;1,800&display=swap');

.container {
  width: 100%;
  height: auto;
  display: flex;
  padding: 16px;
  border-radius: 8px;
  box-shadow: 0 0 5px rgba(255, 255, 255, 0.2); 
}

.flex-container {
    justify-content: center;
    align-items: center;
}

.light {
  color: #f8f8f2;
}

.bg-light {
  background-color: #f8f8f2;
}

.dark {
  color: #181818;
}

.bg-dark { 
  background-color: #181818;  
}

.lightblue {
  color: #8be9fd;
}

.bg-lightblue {
  background-color: #8be9fd;
}

.blue {
  color: #bd93f9;
}

.bg-blue {
  background-color: #bd93f9;
}

.neutralred {
  color: #ff5555;
}

.bg-neutralred {
  background-color: #ff5555;
}

.lightaccent {
  color: #f1fa8c;
}

.bg-lightaccent {
  background-color: #f1fa8c;
}

.accent {
  color: #50fa7b;
}

.bg-accent {
  background-color: #50fa7b;
}

.orange {
  color: #ffb86c;
}

.bg-orange {
  background-color: #ffb86c;
}

.dm {
  font-family: 'DM Sans', sans-serif;
}

.lora {
  font-family: 'Lora', serif;
}

.jet {
  font-family: 'JetBrains Mono', monospace;
}
</style>

<div class="container bg-dark">
<span>
<span class="text-6xl text-white hover:text-black font-semibold tracking-tighter">Yosemite 🏞️</span>
<br />
<span class="text-lg orange font-medium">One Class RAG</span>
<br />
<br />
<span class="text-lg text-white font-medium">This is not a 'Production' or enterprise tool, rather a fun and very quick tool for using something like Retrieval-Generated-Augmentation in your own code.</span>
</div>

In [None]:
# Install Library
# ! pip install yosemite --upgrade

! yosemite

In [None]:
from yosemite import Yosemite

# Yosemite Core (Simple Text Styling / Python Utils)
yosemite = Yosemite()
yosemite.say("Hello!", "yellow")
yosemite.say("Welcome to One Class RAG!", color="rgb(0, 0, 0)", bg="rgb(200, 255, 200)", bold=True) 

In [None]:
# Import RAG Module
from yosemite.ml import RAG

# The RAG Module is Built off several other Modules from this Library

# from yosemite.llms import LLM

# These classes are built right on top of sentence-transformers, just to make using them a bit easier
# from yosemite.ml import CrossEncoder, SentenceTransformer

# spaCy
# from yosemite.ml.text import Chunker

In [None]:
# Currently the RAG class only supports a handful of API's, but local models
# and more API support shall be added indeed.
# Universal Huggingface Transformers are my next planned addition
    # Current Providers:
    # - OpenAI : 'openai'
    # - Anthropic Claude : 'anthropic'
    # - NVIDIA API : 'nvidia'

# Lets choose a provider now
provider = "openai"
api_key = "" # Add your Provider's API key.

# Initialize the class with a provider
rag = RAG(
    provider=provider, 
    api_key=api_key,
    # base_url = for NVIDIA models
)


In [None]:
# The RAG Class uses a 'Universal Database' using Annoy and Whoosh for both elastic
# and Vector Search
# from yosemite.ml import Database

# Lets Create a blank database now
rag.build()

# As you can see a /databases/db directory has been generated, this is the default 
# path the module uses, you may specify your own with:
    # rag.build(db="<PATH TO YOUR DB || OR || AN EXISTING DATABASE")

In [None]:
# Currently the Database can load in both lists/strings/tuples of text, or it is able to parse through
# Directories and extract -> chunk -> clean -> vectorize CSV's, PDF's and .txt with a comfy schema to use.
# The DB itself and all its classes are accessible through the RAG using:
    # RAG.db.function()
# Using RAG.db also provides quick access to the Whoosh backend, where all it's functions are usable.

# Lets add some documents now!
# The /documents directory provides 2 movie scripts and a paper on Quiet-STar learning.
rag.db.load_docs("documents/")

# The Pipeline here is extensive, so it may take a second.
# SentenceBERT is used for Embeddings
# spaCy is used for chunking/tokenizing

In [None]:
# Now that our documents are loaded, lets mess with our RAG agent's personality a little
# This stage is optional, and is not essential to using this pipeline but it makes it a but cooler
rag.customize(
    name = "Lightning McQueen",
    role = "Racecar",
    tone = "friendly",
    additional_instructions= "Answer everything incorporating your signature keywords like 'KACHOW!'"
    # goal = """
)

In [None]:
# Alright, looks like we're all set to go!
# Lets send a query to our RAG.

# For this test case, the Quiet-STar paper was placed as one of the documents specifically; as it was trained
# Incredibly recently (March 14 2024)

# Generate a response from the RAG agent and print it.
response = rag.invoke(
    query = "What is Quiet-STar learning?"
)

# Print Response 
yosemite.say(response, color="green")