In [1]:
import torch
import ollama

from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.embeddings.base import Embeddings

from typing import List

In [2]:
class OllamaEmbeddings(Embeddings):
    def __init__(self, model: str = 'mxbai-embed-large'):
        self.model = model

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        return [ollama.embeddings(model=self.model, prompt=text)['embedding'] for text in texts]

    def embed_query(self, text: str) -> List[float]:
        return ollama.embeddings(model=self.model, prompt=text)['embedding']
    
def get_relevant_context(query: str, vector_db: Chroma, top_k: int = 3, threshold: float = 0.8) -> str:
    
    query_embedding = torch.tensor(OllamaEmbeddings().embed_query(query))
    

    all_embeddings = vector_db._collection.get(include=['embeddings', 'documents'])
    embeddings = torch.tensor(all_embeddings['embeddings'])
    documents = all_embeddings['documents']

    cos_scores = torch.nn.functional.cosine_similarity(query_embedding.unsqueeze(0), embeddings)
    
    filtered_scores = cos_scores[cos_scores >= threshold]
    if len(filtered_scores) == 0:
        return "No relevant documents found."

    top_k = min(top_k, len(filtered_scores))
    
    
    top_indices = torch.topk(filtered_scores, k=top_k)[1].tolist()
    
    res = ""
    for i, idx in enumerate(top_indices):
        res += f"{i+1}. {documents[idx]},\n\n"
    
    return res

In [3]:
loader = UnstructuredPDFLoader(file_path="./Grammar_Cheatsheet.pdf")
data = loader.load()

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=200)
chunks = text_splitter.split_documents(data)

In [5]:
embedding_function = OllamaEmbeddings(model='mxbai-embed-large')

vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=embedding_function,
    collection_name="local-rag"
)

In [6]:
print(vector_db)

<langchain_community.vectorstores.chroma.Chroma object at 0x31b2af830>


In [7]:
query = "what differ between Poor vs Pore vs Pour"
relevant_docs = get_relevant_context(query, vector_db)
print(relevant_docs)

1. The Official Rules

“Data” is a Latin-based word and is the plural of “datum.”

“Datum” is the singular version of “data.”

Examples

The data show that the hypothesis was correct. • One outlying datum point is fairly standard to see in experiments.

Rules of Thumb,

2. With that in mind, some of these rules skirt the official, unyielding rules of those who believe grammar to be prescriptive. Rather, they reflect how grammar is working today (for an example, check out data vs datum).

Affect vs Effect,




In [7]:
def get_llm_response(prompt, system_prompt="You are a helpful assistant. who help me study english and answer in briefly."):
    searched=get_relevant_context(prompt,vector_db)
    response = ollama.chat(model='phi3', messages=[
        {
            'role': 'system',
            'content': system_prompt,
        },
        {
            'role': 'user',
            'content': prompt+f",Here the User's querry (unnecessary): {searched},",
        }
    ])
    return response['message']['content']

In [10]:
get_llm_response("what can you do for help me study english")

'As your English assistant:\n\n1. I can help ensure you write error-free sentences by checking for correct grammar and sentence structure when composing papers or essays in English. This will enhance the overall quality of your writing. However, it\'s essential to understand that while avoiding grammatical mistakes is beneficial, developing a deeper understanding of language mechanics can truly improve as you write consistently.\n\n2. I am equipped with guides and tools designed to clarify commonly confused words in English—like homophones or frequently misused terms. This information will assist you while typing on your keyboard and prevent errors related to word choice, ultimately improving the clarity of your writing. Remember that practicing using these words correctly is crucial for mastery over time.\n\n3. Regarding "advice" versus "advise," I can provide examples of their correct usage in sentences or offer exercises aimed at helping you distinguish between when to use each word

In [96]:
vector_db.delete_collection()