In [1]:
import ollama
import chromadb
import random 
import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize
""" nltk.download('punkt')
nltk.download("punkt_tab") """

story="""

## The Hidden Garden.

In a small town, there was a garden hidden behind a tall wall. The townsfolk often whispered about it, saying it was enchanted. No one had seen the inside, but everyone had their own stories.

Some said it was filled with flowers that could sing, while others believed it was home to a wise old owl that could grant wishes. 

One sunny afternoon, a curious girl named Lily decided to explore the garden. 

Lilly had always been fascinated by the stories and wanted to see if they were true. 

As she approached the wall, she noticed a small, rusty gate slightly ajar. 

With her heart racing, she pushed it open and stepped inside.

The garden was more beautiful than she had imagined. 

Colorful flowers bloomed everywhere, and the air was filled with sweet scents. 

In the center stood an ancient oak tree, its branches stretching wide, offering shade to a small stone bench. 

As she walked closer, she noticed a figure sitting on the bench—a boy about her age, with messy brown hair and bright green eyes.

“Hi, I’m Jack,” he said, smiling. “I come here to think.”

“Hi, I’m Lily,” she replied, feeling a strange connection with him. “I’ve heard so many stories about this place.”

Jack nodded. “Most of them are true. The flowers do sing, but only when they feel happy. And the owl? He’s real too. He watches over the garden.”

Lily’s eyes widened. “Can we see him?”

Jack hesitated. “He only appears when he senses someone who truly believes in magic.”

Lily felt a spark of determination. “I believe! Let’s find him!”

Together, they explored the garden, laughing and sharing stories. They discovered a patch of flowers that began to hum softly when they danced around them. The more they danced, the louder the flowers sang, filling the air with joy.

As they played, they talked about their lives. Lily shared how she often felt lonely at school, while Jack revealed that he had a sick sister who couldn’t leave their home. They both understood what it felt like to wish for something more.

Suddenly, the air grew still, and a soft hoot echoed through the garden. They turned to see a majestic owl perched on a low branch of the oak tree. Its feathers shimmered in the sunlight, and its eyes sparkled with wisdom.

“Welcome, young ones,” the owl said in a deep, soothing voice. “You have shown kindness and belief. What is it that you seek?”

Lily and Jack exchanged glances, unsure of what to say. Finally, Lily spoke up. “I wish for Jack’s sister to get better.”

The owl nodded slowly. “True friendship and selflessness are the greatest forms of magic. I will grant your wish, but remember, kindness must be shared.”

With a flap of its wings, the owl vanished into the air, leaving behind a soft glow. Jack looked at Lily, tears of gratitude in his eyes. “Thank you, Lily. You’ve given me hope.”

As the sun began to set, they promised to meet again in the garden. They had found not only magic but also a friendship that would last a lifetime.



"""
#
documents = sent_tokenize(story)
for i in range(len(documents)):
    documents[i] = documents[i].replace('\n', '')


#print(documents)


questions = [
"What was the main reason Lily decided to explore the hidden garden?",
"How did Jack describe the flowers in the garden?",
"What personal struggles did Lily and Jack share with each other?",
"What did the owl say when it first appeared to Lily and Jack?",
"What wish did Lily make for Jack?",
"How did the flowers react when Lily and Jack danced around them?",
"What did the owl mean by saying 'kindness must be shared'?",
"How did the setting of the garden contribute to the story?",
"What emotions did Jack express after Lily made her wish?",
"How did the relationship between Lily and Jack develop throughout the story?"
]

In [2]:
import random
import ollama
import chromadb
from chromadb.api.types import Documents, EmbeddingFunction, Embeddings
from typing import Optional, Union, List
import pandas as pd


class OllamaEmbeddingFunction(EmbeddingFunction[Documents]):
    Documents = Union[str, List[str], pd.DataFrame]

    def __init__(self, model_name: str = "mxbai-embed-large", collection=None):
        """Initialize the embedding function."""
        self.model_name = model_name
        self.collection = collection  # Store the collection

    def __call__(self, input: Documents) -> List[List[float]]:
        """Embed the input documents."""
        return self._embed(input)

    def _embed(self, documents: Documents) -> List[List[float]]:
        """Generate embeddings for the input documents using Ollama."""
        embeddings = []

        # Handle different input types
        if isinstance(documents, str):
            # If input is a single string, convert it to a list
            documents = [documents]
        elif isinstance(documents, pd.DataFrame):
            # If input is a DataFrame, extract the first column as a list of strings
            documents = documents.iloc[:, 0].tolist()
        
        # Generate embeddings for each document
        for doc in documents:
            response = ollama.embeddings(
                model=self.model_name,
                prompt=doc
            )
            embeddings.append(response["embedding"])
        
        return embeddings

    def _retrieve(self, query: Union[str, List[str]], n_results: int) -> List[str]:
        """Retrieve relevant documents based on a query using Ollama."""
        if self.collection is None:
            raise ValueError("Collection is not set. Please initialize the OllamaEmbeddingFunction with a valid collection.")

        # Handle different input types for query
        if isinstance(query, list):
            # If input is a list, join it into a single string
            query = ' '.join(query)

        response = ollama.embeddings(
            model=self.model_name,
            prompt=query
        )
        query_embedding = response["embedding"]

        results = self.collection.query(
            query_embeddings=[query_embedding],
            n_results=n_results
        )

        return results['documents'][:n_results]  # Return the top n_results documents in correct order







In [3]:
import chromadb
from typing import Optional



# Step 2: Create a ChromaDB HTTP Client and Collection

def initialize_chromadb_collection(host: str = 'localhost', port: int = 8000, reset: Optional[bool] = False, create_new_collection: bool = True, last_used: Optional[dict] = None) -> chromadb.Collection:
    """
    Initializes a ChromaDB HTTP client and creates or retrieves a collection.

    Args:
        host (str): The host where the ChromaDB server is running. Defaults to 'localhost'.
        port (int): The port on which the ChromaDB server is listening. Defaults to 8000.
        reset (Optional[bool]): If True, resets the ChromaDB client before creating or using a collection. Defaults to False.
        create_new_collection (bool): If True, creates a new collection with a serial numbered name. If False, uses the last used collection name. Defaults to True.
        last_used (Optional[dict]): A dictionary to store the last used collection name and number. Defaults to None.

    Returns:
        chromadb.Collection: The created or existing ChromaDB collection.
    """
    # Initialize last_used if it is None
    if last_used is None:
        last_used = {'collection_name': None, 'serial_number': 0}
    elif 'serial_number' not in last_used:
        last_used['serial_number'] = 0

    # Create a ChromaDB HTTP client
    client = chromadb.HttpClient(host=host, port=port)
    
    # Reset the client if requested
    if reset:
        client.reset()
    
    if create_new_collection:
        # Increment the serial number for the new collection name
        last_used['serial_number'] += 1
        collection_name = f"docs{last_used['serial_number']}"
        
        # Use get_or_create_collection to avoid UniqueConstraintError
        collection = client.get_or_create_collection(name=collection_name)
        
        # Store the collection name for future use
        last_used['collection_name'] = collection_name
    else:
        # Use the last used collection name
        collection_name = last_used.get('collection_name')
        
        if collection_name is None:
            raise ValueError("No previous collection name found. Set create_new_collection to True to create a new collection.")
        
        # Get or create the collection with the last used name
        collection = client.get_or_create_collection(name=collection_name)

    # Print the name of the created or used collection
    print(f"Using collection: {collection.name}")
    
    return collection


In [4]:
# Create a dictionary to store the last used collection name and serial number
last_used_info = {}

# Create a new collection
collection1 = initialize_chromadb_collection(last_used=last_used_info)

# Use the last created collection
collection2 = initialize_chromadb_collection(create_new_collection=False, last_used=last_used_info)

# Print the collection names
print(collection1.name)  # Should print the name of the newly created collection
print(collection2.name)  # Should print the same name as collection1


Using collection: docs1
Using collection: docs1
docs1
docs1


In [5]:
### test of collection


collection1 = initialize_chromadb_collection(last_used=last_used_info)
print(collection1.name)
# Step 3: Create an instance of OllamaEmbeddingFunction
embedding_function = OllamaEmbeddingFunction(model_name="mxbai-embed-large", collection=collection1)

# Step 4: Embed Documents and Add Them to the Collection
documents = documents

# Embed documents and add them to the collection with IDs
embeddings = embedding_function(documents)
collection1.add(
    ids=[str(i) for i in range(1, len(documents) + 1)],
    embeddings=embeddings,
    documents=documents
)

# Step 5: Retrieve Relevant Documents
query = questions[2]
retrieved_documents = embedding_function._retrieve(query, n_results=3)

# Print the retrieved documents
for doc in retrieved_documents:
    print(doc)


Using collection: docs2
docs2
['Lily shared how she often felt lonely at school, while Jack revealed that he had a sick sister who couldn’t leave their home.', 'What is it that you seek?”Lily and Jack exchanged glances, unsure of what to say.', 'Jack looked at Lily, tears of gratitude in his eyes.']


In [6]:
## test 2

collection2 = initialize_chromadb_collection(create_new_collection=False, last_used=last_used_info)
print(collection2.name)
# Step 3: Create an instance of OllamaEmbeddingFunction
embedding_function = OllamaEmbeddingFunction(model_name="mxbai-embed-large", collection=collection2)

# Step 4: Embed Documents and Add Them to the Collection
documents = documents

# Embed documents and add them to the collection with IDs
embeddings = embedding_function(documents)
collection2.add(
    ids=[str(i) for i in range(1, len(documents) + 1)],
    embeddings=embeddings,
    documents=documents
)

# Step 5: Retrieve Relevant Documents
query = questions[3]
retrieved_documents = embedding_function._retrieve(query, n_results=3)

# Print the retrieved documents
for doc in retrieved_documents:
    print(doc)


Using collection: docs2
docs2
['And the owl?', 'What is it that you seek?”Lily and Jack exchanged glances, unsure of what to say.', '“Welcome, young ones,” the owl said in a deep, soothing voice.']


# the code above works as a great RMC

In [7]:
### test for  DSPythonicRMClient

from typing import Optional
import dspy  # Assuming dspy is a module you have that includes the Retrieve and Prediction classes

class DSPythonicRMClient(dspy.Retrieve):
    def __init__(self, embedding_function: OllamaEmbeddingFunction, k: int = 3):
        """
        Initialize the DSPythonicRMClient.

        Args:
            embedding_function (OllamaEmbeddingFunction): The embedding function to use for retrieval.
            k (int): The number of top passages to retrieve. Defaults to 3.
        """
        super().__init__(k=k)
        self.embedding_function = embedding_function

    def retrieve_with_embedding(self, query: str, n_results: Optional[int] = None) -> dspy.Prediction:
        """
        Retrieve passages based on the embedded query.

        Args:
            query (str): The query string for which to retrieve passages.
            n_results (Optional[int]): The number of results to return. Defaults to k.

        Returns:
            dspy.Prediction: An object containing the retrieved passages.
        """
        n_results = n_results if n_results is not None else self.k
        retrieved_documents = self.embedding_function._retrieve(query, n_results=n_results)

        return dspy.Prediction(passages=retrieved_documents)

# Example Usage
if __name__ == "__main__":
    # Step 1: Initialize ChromaDB Collection
    #collection = collection

    """ # Step 2: Create an instance of OllamaEmbeddingFunction
    embedding_function = OllamaEmbeddingFunction(model_name="mxbai-embed-large", collection=collection)

    # Step 3: Embed Documents and Add Them to the Collection
    #documents = documents

    # Embed documents and add them to the collection
    embeddings = embedding_function(documents)
    collection.add(
        ids=[str(i) for i in range(1, len(documents) + 1)],
        embeddings=embeddings,
        documents=documents
    )
 """
    # Step 4: Create an instance of DSPythonicRMClient
    retriever_model = DSPythonicRMClient(embedding_function=embedding_function, k=5)

    # Step 5: Retrieve Relevant Documents
    query = questions[3]
    results = retriever_model.retrieve_with_embedding(query)
    print(results,"\n\n ------")

    # Print the retrieved passages
    for passage in results.passages:
        passage.reverse()
        print(passage)




Prediction(
    passages=[['And the owl?', 'What is it that you seek?”Lily and Jack exchanged glances, unsure of what to say.', '“Welcome, young ones,” the owl said in a deep, soothing voice.', '“I wish for Jack’s sister to get better.”The owl nodded slowly.', 'Some said it was filled with flowers that could sing, while others believed it was home to a wise old owl that could grant wishes.']]
) 

 ------
['Some said it was filled with flowers that could sing, while others believed it was home to a wise old owl that could grant wishes.', '“I wish for Jack’s sister to get better.”The owl nodded slowly.', '“Welcome, young ones,” the owl said in a deep, soothing voice.', 'What is it that you seek?”Lily and Jack exchanged glances, unsure of what to say.', 'And the owl?']
