## Data loding and Embedding

In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import  PyPDFDirectoryLoader
from pathlib import Path


def load_directory_pdf(directory_path):
    loader = PyPDFDirectoryLoader(directory_path)
    docs = loader.load()
    return docs


def text_splitter(document):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=2000,
        chunk_overlap=100,
        length_function=len,
        is_separator_regex=False,
    )
    texts = text_splitter.split_documents(document)

    return texts

import os
os.chdir("../")

# Load PDF Files
DATA_DIR = Path("./data")
data = load_directory_pdf(DATA_DIR)

document=text_splitter(data)



  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


In [4]:

import ollama
# client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")
def get_embedding(text, model='nomic-embed-text:latest'):    
    return ollama.embeddings(model=model, prompt=text)['embedding']

print(get_embedding("Once upon a time, there was a cat."))

[0.6478316783905029, 1.3252785205841064, -3.1200063228607178, -1.623626947402954, 1.1204919815063477, 0.7267512083053589, -1.7795132398605347, 0.8745321035385132, -0.493694543838501, 0.13032908737659454, -0.10548596829175949, 0.7464886903762817, 1.5896183252334595, 0.8342281579971313, -1.5233094692230225, -1.5566062927246094, 0.7202525734901428, -0.7342134714126587, 0.4372457265853882, 1.0877854824066162, 0.014299079775810242, -0.7033194303512573, 1.310410499572754, 0.5492509007453918, 0.781649112701416, 1.08658766746521, -1.0013372898101807, 0.9904807209968567, -0.2921236455440521, 0.21465399861335754, 0.38804811239242554, -0.5687799453735352, 0.2024337351322174, -0.874481201171875, -0.3569786548614502, -0.7931259870529175, 1.5731021165847778, 0.25834956765174866, 0.9956268072128296, 1.2419655323028564, 0.5690503716468811, 0.7958667874336243, -0.8756253719329834, -0.2181982547044754, 1.0355340242385864, 0.46038001775741577, 0.17863613367080688, -0.27561068534851074, -0.374811828136444

In [5]:
from dotenv import load_dotenv
load_dotenv()
from qdrant_client import QdrantClient,models


# Qdrant
QDRANT_HOST = "http://localhost:6333"  # Local Qdrant
QDRANT_COLLECTION = "law_docs"



qclient = QdrantClient(url=QDRANT_HOST)



def createDB(collectionName):
        if qclient.collection_exists(collection_name=f"{collectionName}"):
                print(f"Vector DB already exits :{collectionName}")
                return
        
        print(f"Vector DB Successfully created :{collectionName}")
        
        return qclient.create_collection(
                collection_name=f"{collectionName}",
                vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),)


createDB(QDRANT_COLLECTION)

Vector DB Successfully created :law_docs


True

In [6]:
import uuid
def embed_and_store(chunks,QDRANT_COLLECTION):

    """Embeds text chunks and stores them in Qdrant."""
    for i, chunk in enumerate(chunks):
        embedding = get_embedding(chunk.page_content)
        # print(f"{i} : {embedding}")
        qclient.upsert(
            collection_name=QDRANT_COLLECTION,
            points=[
                models.PointStruct(
                id= f'{uuid.uuid4()}', vector= embedding, payload= {"data":chunk.page_content,"metadata":chunk.metadata}),
            ],
        )


embed_and_store(document,QDRANT_COLLECTION)

In [7]:
def query_documents(query):
    """Searches Qdrant for relevant documents."""
    query_embedding = get_embedding(query)
    return  qclient.search(
        collection_name=QDRANT_COLLECTION,
        query_vector=query_embedding,
        limit=1,
    )[0]


# Example user query
user_query = "How Get a Social Security Number"
result = query_documents(user_query)
print("Relevant information:")

score=result.score
context=result.payload['data']
print("Score  :",score)
print("Data :\n",context)
print("--"*100)

Relevant information:
Score  : 0.60554457
Data :
 security account number issued to an alien no t authorized to work in the United States, the 
Commissioner of Social Security shall provide the Attorney General with information regarding the name 
and address of the alien, the name and address of the person reporting the earnings, and the amount of 
the earnings. The information shall be provided in an electronic form agreed upon by the Commissioner 
and the Attorney General.  
  
(d) A written certification signed by the Attorney Genera l or by any officer of the Service designated by the 
Attorney General to make such certif ication, that after diligent search no record or entry of a specified 
nature is found to exist in the records of the Service,  shall be admissible as evidence in any proceeding as 
evidence that the records of the Service contain no such record or entry, and shall have the same effect as 
the testimony of a witness given in open court.  
  
  
FOOTNOTES FOR SECT

In [9]:


prompt_Template=f""" You are Lawyer

here is the question : {user_query}

and addition context to support answer  {context}

"""




systemPrompt="You are helpful assistant that handle user query and give answer and only answer to user question and use it context rather than take information from outside system  "
# Point to the local server

response = ollama.chat(
    model='llama3:8b', 
    messages=[
        {"role": "system", "content": systemPrompt},
        {"role": "user", "content": user_query}
    ],
    # stream=True
)
response_json=response['message']['content']

# for chunk in response:
#   print(chunk['message']['content'], end='', flush=True)

In [11]:
print(response_json) 

I'd be happy to help you with that!

To get a Social Security number (SSN), follow these steps:

1. **Determine eligibility**: You can apply for an SSN if you're a:
	* U.S. citizen
	* Lawful permanent resident (with a Green Card)
	* Refugee or asylum seeker
2. **Gather required documents**:
	* Proof of identity: A valid government-issued ID, such as a driver's license, state ID, or passport.
	* Proof of age: A birth certificate or adoption papers.
	* Proof of immigration status (if applicable): A Green Card, I-94 arrival/departure record, or other relevant documents.
3. **Apply for an SSN online**:
	* Go to the Social Security Administration's (SSA) website ([www.ssa.gov](http://www.ssa.gov)) and fill out Form SSA-1, Application for a Social Security Card.
	* Upload supporting documentation.
	* Create an account or sign in if you already have one.
4. **Apply by phone**:
	* Call the SSA at 1-800-772-1213 (TTY 1-800-325-0778) and ask to apply for a Social Security number.
5. **Visit your

In [22]:
from ollama import Client
from ollama import ChatResponse

client=Client(host="http://localhost:11434")

response:ChatResponse=client.chat(model="llama3:8b",messages=[{
    "role":"user",
    "content":"hey",
}])


response

{'model': 'llama3:8b',
 'created_at': '2024-12-03T17:43:10.9917587Z',
 'message': {'role': 'assistant', 'content': "Hey! How's it going?"},
 'done_reason': 'stop',
 'done': True,
 'total_duration': 2816612800,
 'load_duration': 31942600,
 'prompt_eval_count': 11,
 'prompt_eval_duration': 1285708000,
 'eval_count': 8,
 'eval_duration': 1497236000}

In [24]:
response['message']['content']

"Hey! How's it going?"

## Evulating with deep Evaul

### Custom LLM Evualtor

In [32]:
from typing import Any, Coroutine
from deepeval.models import DeepEvalBaseLLM
from ollama import Client
from ollama import ChatResponse
import asyncio
from pydantic import BaseModel

class customLMStudio(DeepEvalBaseLLM):
    def __init__(self, url="http://localhost:11434",model="llama3:8b"):
        self.model = Client(host=url)
        self.model_name = model

    def load_model(self, *args, **kwargs)->Client:
        return self.model
    
    def generate(self, prompt: str) -> str:
        client = self.load_model()
        completion:ChatResponse = client.chat(
            model=self.model_name,
            messages=[
                {"role": "system", "content": "Your helpful AI for Evaluation"},
                {"role": "user", "content": prompt}
            ],
            # format="json"
        )
        return completion['message']["content"]
    
    async def a_generate(self, prompt: str) -> str:
        # Use asyncio.to_thread to run the blocking generate method in a separate thread
        return self.generate(prompt=prompt)

    def get_model_name(self):
        return self.model_name

c = customLMStudio()

In [33]:
c.generate("Hey")

"Hi! I'm happy to help with any questions or topics you'd like to discuss. What's on your mind today?"

In [34]:
c.get_model_name()

'llama3:8b'

In [35]:
c.a_generate("Hey")

<coroutine object customLMStudio.a_generate at 0x000001B62663D080>

### AnswerRelevancyMetric

In [36]:
from deepeval import evaluate
from deepeval.metrics import AnswerRelevancyMetric
from deepeval.test_case import LLMTestCase

# Replace this with the actual output from your LLM application
actual_output = "We offer a 30-day full refund at no extra cost."

metric = AnswerRelevancyMetric(
    threshold=0.7,
    model=customLMStudio(),
    include_reason=True
)
test_case = LLMTestCase(
    input="What if these shoes don't fit?",
    actual_output=actual_output,
    retrieval_context=["We offer a 40-day full refund at no extra cost"]

)

metric.measure(test_case)
print(metric.score)
print(metric.reason)



Output()

1.0
The score is 1.00 because the answer directly addresses the concern about shoe fit, providing a relevant and accurate response.


In [37]:
# or evaluate test cases in bulk
evaluate([test_case], [metric])

Output()

Evaluating test cases...
Event loop is already running. Applying nest_asyncio patch to allow async execution...




Metrics Summary

  - ❌ Answer Relevancy (score: 0.5, threshold: 0.7, strict: False, evaluation model: llama3:8b, reason: The score is 0.50 because the answer includes a statement that is completely off-topic and unrelated to addressing the issue of poorly fitting shoes., error: None)

For test case:

  - input: What if these shoes don't fit?
  - actual output: We offer a 30-day full refund at no extra cost.
  - expected output: None
  - context: None
  - retrieval context: ['We offer a 40-day full refund at no extra cost']


Overall Metric Pass Rates

Answer Relevancy: 0.00% pass rate




[TestResult(success=False, metrics_metadata=[MetricMetadata(metric='Answer Relevancy', threshold=0.7, success=False, score=0.5, reason='The score is 0.50 because the answer includes a statement that is completely off-topic and unrelated to addressing the issue of poorly fitting shoes.', strict_mode=False, evaluation_model='llama3:8b', error=None, evaluation_cost=None, verbose_logs='Statements:\n[\n    "We offer a 30-day full refund",\n    "at no extra cost"\n]\n\nVerdicts:\n[\n    {\n        "verdict": "no",\n        "reason": "The statement is irrelevant to the input, which asks about the situation where shoes don\'t fit."\n    },\n    {\n        "verdict": "yes",\n        "reason": null\n    }\n]')], input="What if these shoes don't fit?", actual_output='We offer a 30-day full refund at no extra cost.', expected_output=None, context=None, retrieval_context=['We offer a 40-day full refund at no extra cost'])]

### Contextual Precision

The contextual precision metric measures your RAG pipeline's retriever by evaluating whether nodes in your retrieval_context that are relevant to the given input are ranked higher than irrelevant ones. deepeval's contextual precision metric is a self-explaining LLM-Eval, meaning it outputs a reason for its metric score.

In [39]:
from deepeval import evaluate
from deepeval.metrics import ContextualPrecisionMetric
from deepeval.test_case import LLMTestCase

# Replace this with the actual output from your LLM application
actual_output = "We offer a 30-day full refund at no extra cost."

# Replace this with the expected output from your RAG generator
expected_output = "You are eligible for a 30 day full refund at no extra cost."

# Replace this with the actual retrieved context from your RAG pipeline
retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost.",""]

metric = ContextualPrecisionMetric(
    threshold=0.7,
    model=customLMStudio(),    
    include_reason=True
)
test_case = LLMTestCase(
    input="What if these shoes don't fit?",
    actual_output=actual_output,
    expected_output=expected_output,
    retrieval_context=retrieval_context
)

metric.measure(test_case)
print(metric.score)
print(metric.reason)

# or evaluate test cases in bulk
evaluate([test_case], [metric])

Output()

Output()

1.0
The score is 1.00 because the relevant node ('yes' verdict) is correctly ranked as the top node, while the irrelevant nodes ('no' verdicts) are understandably ranked lower due to their lack of useful information in providing a suitable solution for the input 'What if these shoes don't fit?'.
Evaluating test cases...
Event loop is already running. Applying nest_asyncio patch to allow async execution...




Metrics Summary

  - ✅ Contextual Precision (score: 1.0, threshold: 0.7, strict: False, evaluation model: llama3:8b, reason: The score is 1.00 because the retrieval context with a 'yes' verdict clearly provides relevant information about the expected output, while the node ranked second ('no' verdict) is irrelevant and doesn't provide any useful details about refund process or eligibility., error: None)

For test case:

  - input: What if these shoes don't fit?
  - actual output: We offer a 30-day full refund at no extra cost.
  - expected output: You are eligible for a 30 day full refund at no extra cost.
  - context: None
  - retrieval context: ['All customers are eligible for a 30 day full refund at no extra cost.', '']


Overall Metric Pass Rates

Contextual Precision: 100.00% pass rate




[TestResult(success=True, metrics_metadata=[MetricMetadata(metric='Contextual Precision', threshold=0.7, success=True, score=1.0, reason="The score is 1.00 because the retrieval context with a 'yes' verdict clearly provides relevant information about the expected output, while the node ranked second ('no' verdict) is irrelevant and doesn't provide any useful details about refund process or eligibility.", strict_mode=False, evaluation_model='llama3:8b', error=None, evaluation_cost=None, verbose_logs='Verdicts:\n[\n    {\n        "verdict": "yes",\n        "reason": "The context clearly mentions that all customers are eligible for a 30 day full refund, which matches the expected output."\n    },\n    {\n        "verdict": "no",\n        "reason": "\'\' is empty and doesn\'t provide any useful information about the refund process or eligibility."\n    }\n]')], input="What if these shoes don't fit?", actual_output='We offer a 30-day full refund at no extra cost.', expected_output='You are 

## DeepEvual with OPENAI API

In [24]:
import os

os.environ["OPENAI_API_KEY"]="your api key"

In [33]:
from deepeval import evaluate
from deepeval.metrics import AnswerRelevancyMetric
from deepeval.test_case import LLMTestCase

# Replace this with the actual output from your LLM application
actual_output = "We offer a 30-day full refund at no extra cost."

metric = AnswerRelevancyMetric(
    threshold=0.7,
    model="gpt-4",
    include_reason=True
)
test_case = LLMTestCase(
    input="What if these shoes don't fit?",
    actual_output=actual_output,
    retrieval_context=["We offer a 40-day full refund at no extra cost"]

)

metric.measure(test_case)
print(metric.score)
print(metric.reason)

# or evaluate test cases in bulk
evaluate([test_case], [metric])

Output()

Output()

1.0
The score is 1.00 because the response perfectly addresses the concern raised in the input without any irrelevant statements.
Evaluating test cases...
Event loop is already running. Applying nest_asyncio patch to allow async execution...




Metrics Summary

  - ✅ Answer Relevancy (score: 1.0, threshold: 0.7, strict: False, evaluation model: gpt-4, reason: The score is 1.00 because the response perfectly addressed the concern about the shoes not fitting., error: None)

For test case:

  - input: What if these shoes don't fit?
  - actual output: We offer a 30-day full refund at no extra cost.
  - expected output: None
  - context: None
  - retrieval context: ['We offer a 40-day full refund at no extra cost']


Overall Metric Pass Rates

Answer Relevancy: 100.00% pass rate




[TestResult(success=True, metrics_metadata=[MetricMetadata(metric='Answer Relevancy', threshold=0.7, success=True, score=1.0, reason='The score is 1.00 because the response perfectly addressed the concern about the shoes not fitting.', strict_mode=False, evaluation_model='gpt-4', error=None, evaluation_cost=0.026369999999999998, verbose_logs='Statements:\n[\n    "We offer a 30-day full refund at no extra cost."\n]\n\nVerdicts:\n[\n    {\n        "verdict": "yes",\n        "reason": null\n    }\n]')], input="What if these shoes don't fit?", actual_output='We offer a 30-day full refund at no extra cost.', expected_output=None, context=None, retrieval_context=['We offer a 40-day full refund at no extra cost'])]

In [34]:
from deepeval import evaluate
from deepeval.metrics import ContextualPrecisionMetric
from deepeval.test_case import LLMTestCase

# Replace this with the actual output from your LLM application
actual_output = "We offer a 300-day full refund at no extra cost."

# Replace this with the expected output from your RAG generator
expected_output = "You are eligible for a 30 day full refund at no extra cost."

# Replace this with the actual retrieved context from your RAG pipeline
retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]

metric = ContextualPrecisionMetric(
    threshold=0.7,
    model="gpt-4",
    include_reason=True
)
test_case = LLMTestCase(
    input="What if these shoes don't fit?",
    actual_output=actual_output,
    expected_output=expected_output,
    retrieval_context=retrieval_context
)

metric.measure(test_case)
print(metric.score)
print(metric.reason)

# or evaluate test cases in bulk
evaluate([test_case], [metric])

Output()

Output()

1.0
The score is 1.00 because the only node in the retrieval context directly addresses the issue of the shoes not fitting.
Evaluating test cases...
Event loop is already running. Applying nest_asyncio patch to allow async execution...




Metrics Summary

  - ✅ Contextual Precision (score: 1.0, threshold: 0.7, strict: False, evaluation model: gpt-4, reason: The score is 1.00 because the only node in the retrieval context directly addresses the user's concern about shoes not fitting, hence its high relevance and top ranking., error: None)

For test case:

  - input: What if these shoes don't fit?
  - actual output: We offer a 300-day full refund at no extra cost.
  - expected output: You are eligible for a 30 day full refund at no extra cost.
  - context: None
  - retrieval context: ['All customers are eligible for a 30 day full refund at no extra cost.']


Overall Metric Pass Rates

Contextual Precision: 100.00% pass rate




[TestResult(success=True, metrics_metadata=[MetricMetadata(metric='Contextual Precision', threshold=0.7, success=True, score=1.0, reason="The score is 1.00 because the only node in the retrieval context directly addresses the user's concern about shoes not fitting, hence its high relevance and top ranking.", strict_mode=False, evaluation_model='gpt-4', error=None, evaluation_cost=0.030809999999999997, verbose_logs='Verdicts:\n[\n    {\n        "verdict": "yes",\n        "reason": "The text \'All customers are eligible for a 30 day full refund at no extra cost.\' directly addresses the concern about shoes not fitting."\n    }\n]')], input="What if these shoes don't fit?", actual_output='We offer a 300-day full refund at no extra cost.', expected_output='You are eligible for a 30 day full refund at no extra cost.', context=None, retrieval_context=['All customers are eligible for a 30 day full refund at no extra cost.'])]