## Data loding and Embedding

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import  PyPDFDirectoryLoader
from pathlib import Path


def load_directory_pdf(directory_path):
    loader = PyPDFDirectoryLoader(directory_path)
    docs = loader.load()
    return docs


def text_splitter(document):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=2000,
        chunk_overlap=100,
        length_function=len,
        is_separator_regex=False,
    )
    texts = text_splitter.split_documents(document)

    return texts

import os
os.chdir("../")

# Load PDF Files
DATA_DIR = Path("./data")
data = load_directory_pdf(DATA_DIR)

document=text_splitter(data)



  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


In [3]:

# Make sure to `pip install openai` first
from openai import OpenAI
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

def get_embedding(text, model="nomic-ai/nomic-embed-text-v1.5-GGUF"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

In [8]:
    
from openai import OpenAI
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")
import uuid
def get_embedding(text, model="nomic-ai/nomic-embed-text-v1.5-GGUF"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding




In [9]:
from dotenv import load_dotenv
load_dotenv()
from qdrant_client import QdrantClient,models


# Qdrant
QDRANT_HOST = "http://localhost:6333"  # Local Qdrant
QDRANT_COLLECTION = "law_docs"



qclient = QdrantClient(url=QDRANT_HOST)



def createDB(collectionName):
        if qclient.collection_exists(collection_name=f"{collectionName}"):
                print(f"Vector DB already exits :{collectionName}")
                return
        
        print(f"Vector DB Successfully created :{collectionName}")
        
        return qclient.create_collection(
                collection_name=f"{collectionName}",
                vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),)


createDB(QDRANT_COLLECTION)

Vector DB Successfully created :law_docs


True

In [10]:

def embed_and_store(chunks,QDRANT_COLLECTION):

    """Embeds text chunks and stores them in Qdrant."""
    for i, chunk in enumerate(chunks):
        embedding = get_embedding(chunk.page_content)
        # print(f"{i} : {embedding}")
        qclient.upsert(
            collection_name=QDRANT_COLLECTION,
            points=[
                models.PointStruct(
                id= f'{uuid.uuid4()}', vector= embedding, payload= {"data":chunk.page_content,"metadata":chunk.metadata}),
            ],
        )


embed_and_store(document,QDRANT_COLLECTION)

In [11]:
def query_documents(query):
    """Searches Qdrant for relevant documents."""
    query_embedding = get_embedding(query)
    return  qclient.search(
        collection_name=QDRANT_COLLECTION,
        query_vector=query_embedding,
        limit=1,
    )[0]


# Example user query
user_query = "How Get a Social Security Number"
result = query_documents(user_query)
print("Relevant information:")

score=result.score
context=result.payload['data']
print("Score  :",score)
print("Data :\n",context)
print("--"*100)

Relevant information:
Score  : 0.7758323
Data :
 28 Get a Social Security Number
As a permanent resident, you are eligible for a Social Security number, 
which is a number assigned to you by the U.S. government. It helps the government keep track of your earnings and the benefits you can receive. Y our Social Security number is also used by financial institutions and other agencies, such as schools, to identify you. You may be asked for your Social Security number when you rent an apartment or buy a home.
Social Security is a U.S. government program that 
provides benefits for certain retired workers and their families, certain disabled workers and their families, and certain family members of deceased workers. The government department in charge of Social Security is called the Social Security Administration (SSA). 
Find the Social Security office closest to you by:
● Looking on the SSA website, www.socialsecurity.gov. For Spanish, visit 
www.segurosocial.gov/espanol. The website also

In [12]:


prompt_Template=f""" You are Lawyer

here is the question : {user_query}

and addition context to support answer  {context}

"""




systemPrompt="You are helpful assistant that handle user query and give answer and only answer to user question and use it context rather than take information from outside system  "
# Point to the local server
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

completion = client.chat.completions.create(
  model="lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF",
  
  messages=[
    {"role": "system", "content":systemPrompt },
    {"role": "user", "content": prompt_Template}
  ],
  temperature=0.7,
  stream=True
)

llm_response=""
# Iterate over the stream of responses
for chunk in completion:
  # Print the text from each chunk
  if chunk.choices[0].delta.content is not None:
    # Print the text from each chunk
    llm_response += chunk.choices[0].delta.content
    print(chunk.choices[0].delta.content, end='')

As a lawyer, I'd be happy to help you navigate the process of obtaining a Social Security number as a permanent resident.

To get a Social Security number, you can follow these steps:

1. Find a Social Security office near you: You can find the closest Social Security office by visiting the SSA website (www.socialsecurity.gov) or calling 1-800-772-1213.
2. Make an appointment: It's recommended to make an appointment before visiting the office, especially if you're not fluent in English. This will ensure that an interpreter is available to assist you during your visit.
3. Gather required documents: You'll need to bring proof of identity and immigration status to the Social Security office. Acceptable documents include:
	* A valid passport
	* An employment authorization document (EAD)
	* A visa (if applicable)
4. Fill out Form SS-5: When you visit the Social Security office, you'll be given a copy of Form SS-5, Application for a Social Security Card. Fill it out completely and accurately

## Evulating

In [44]:
from typing import Any, Coroutine
from deepeval.models import DeepEvalBaseLLM
from openai import OpenAI
import asyncio
from pydantic import BaseModel

class customLMStudio(DeepEvalBaseLLM):
    def __init__(self, model="lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF"):
        self.model = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")
        self.model_name = model

    def load_model(self, *args, **kwargs):
        return self.model
    
    def generate(self, prompt: str) -> str:
        client = self.load_model()
        completion = client.chat.completions.create(
            model=self.model_name,
            messages=[
                {"role": "system", "content": "Your helpful AI for Evaluation"},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,
        )
        return completion.choices[0].message.content
    
    async def a_generate(self, prompt: str) -> str:
        # Use asyncio.to_thread to run the blocking generate method in a separate thread
        return self.generate(prompt=prompt)

    def get_model_name(self):
        return self.model_name

c = customLMStudio()

In [45]:
c.generate("Hey")

"Hey! It looks like you accidentally sent an empty message. Is there something on your mind that you'd like to talk about or ask? I'm here to help with any questions or topics you'd like to discuss!"

In [37]:
c.get_model_name()

'lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF'

In [46]:
c.a_generate("Hey")

<coroutine object customLMStudio.a_generate at 0x0000029E2D57D560>

In [47]:
from deepeval import evaluate
from deepeval.metrics import AnswerRelevancyMetric
from deepeval.test_case import LLMTestCase

# Replace this with the actual output from your LLM application
actual_output = "We offer a 30-day full refund at no extra cost."

metric = AnswerRelevancyMetric(
    threshold=0.7,
    model=customLMStudio(),
    include_reason=True
)
test_case = LLMTestCase(
    input="What if these shoes don't fit?",
    actual_output=actual_output,
    retrieval_context=["We offer a 40-day full refund at no extra cost"]

)

metric.measure(test_case)
print(metric.score)
print(metric.reason)

# or evaluate test cases in bulk
evaluate([test_case], [metric])

Output()

Output()

0.6666666666666666
The score is 0.67 because although the output addresses some concerns, it also includes an irrelevant statement ('We offer') that does not directly answer the original question about fitting shoes.
Evaluating test cases...
Event loop is already running. Applying nest_asyncio patch to allow async execution...




Metrics Summary

  - ❌ Answer Relevancy (score: 0.0, threshold: 0.7, strict: False, evaluation model: lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF, reason: The score is 0.00 because the actual output provided statements that were completely unrelated to the input question, discussing shoe refunds instead of addressing the issue of poorly fitting shoes., error: None)

For test case:

  - input: What if these shoes don't fit?
  - actual output: We offer a 30-day full refund at no extra cost.
  - expected output: None
  - context: None
  - retrieval context: ['We offer a 40-day full refund at no extra cost']


Overall Metric Pass Rates

Answer Relevancy: 0.00% pass rate




[TestResult(success=False, metrics_metadata=[MetricMetadata(metric='Answer Relevancy', threshold=0.7, success=False, score=0.0, reason='The score is 0.00 because the actual output provided statements that were completely unrelated to the input question, discussing shoe refunds instead of addressing the issue of poorly fitting shoes.', strict_mode=False, evaluation_model='lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF', error=None, evaluation_cost=None, verbose_logs='Statements:\n[\n    "We offer a 30-day full refund",\n    "at no extra cost"\n]\n\nVerdicts:\n[\n    {\n        "verdict": "no",\n        "reason": "The statements provided are about shoe refunds and are not related to the input question, which asks about what to do if shoes don\'t fit."\n    }\n]')], input="What if these shoes don't fit?", actual_output='We offer a 30-day full refund at no extra cost.', expected_output=None, context=None, retrieval_context=['We offer a 40-day full refund at no extra cost'])]

In [24]:
import os

os.environ["OPENAI_API_KEY"]="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"

In [33]:
from deepeval import evaluate
from deepeval.metrics import AnswerRelevancyMetric
from deepeval.test_case import LLMTestCase

# Replace this with the actual output from your LLM application
actual_output = "We offer a 30-day full refund at no extra cost."

metric = AnswerRelevancyMetric(
    threshold=0.7,
    model="gpt-4",
    include_reason=True
)
test_case = LLMTestCase(
    input="What if these shoes don't fit?",
    actual_output=actual_output,
    retrieval_context=["We offer a 40-day full refund at no extra cost"]

)

metric.measure(test_case)
print(metric.score)
print(metric.reason)

# or evaluate test cases in bulk
evaluate([test_case], [metric])

Output()

Output()

1.0
The score is 1.00 because the response perfectly addresses the concern raised in the input without any irrelevant statements.
Evaluating test cases...
Event loop is already running. Applying nest_asyncio patch to allow async execution...




Metrics Summary

  - ✅ Answer Relevancy (score: 1.0, threshold: 0.7, strict: False, evaluation model: gpt-4, reason: The score is 1.00 because the response perfectly addressed the concern about the shoes not fitting., error: None)

For test case:

  - input: What if these shoes don't fit?
  - actual output: We offer a 30-day full refund at no extra cost.
  - expected output: None
  - context: None
  - retrieval context: ['We offer a 40-day full refund at no extra cost']


Overall Metric Pass Rates

Answer Relevancy: 100.00% pass rate




[TestResult(success=True, metrics_metadata=[MetricMetadata(metric='Answer Relevancy', threshold=0.7, success=True, score=1.0, reason='The score is 1.00 because the response perfectly addressed the concern about the shoes not fitting.', strict_mode=False, evaluation_model='gpt-4', error=None, evaluation_cost=0.026369999999999998, verbose_logs='Statements:\n[\n    "We offer a 30-day full refund at no extra cost."\n]\n\nVerdicts:\n[\n    {\n        "verdict": "yes",\n        "reason": null\n    }\n]')], input="What if these shoes don't fit?", actual_output='We offer a 30-day full refund at no extra cost.', expected_output=None, context=None, retrieval_context=['We offer a 40-day full refund at no extra cost'])]

In [34]:
from deepeval import evaluate
from deepeval.metrics import ContextualPrecisionMetric
from deepeval.test_case import LLMTestCase

# Replace this with the actual output from your LLM application
actual_output = "We offer a 300-day full refund at no extra cost."

# Replace this with the expected output from your RAG generator
expected_output = "You are eligible for a 30 day full refund at no extra cost."

# Replace this with the actual retrieved context from your RAG pipeline
retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]

metric = ContextualPrecisionMetric(
    threshold=0.7,
    model="gpt-4",
    include_reason=True
)
test_case = LLMTestCase(
    input="What if these shoes don't fit?",
    actual_output=actual_output,
    expected_output=expected_output,
    retrieval_context=retrieval_context
)

metric.measure(test_case)
print(metric.score)
print(metric.reason)

# or evaluate test cases in bulk
evaluate([test_case], [metric])

Output()

Output()

1.0
The score is 1.00 because the only node in the retrieval context directly addresses the issue of the shoes not fitting.
Evaluating test cases...
Event loop is already running. Applying nest_asyncio patch to allow async execution...




Metrics Summary

  - ✅ Contextual Precision (score: 1.0, threshold: 0.7, strict: False, evaluation model: gpt-4, reason: The score is 1.00 because the only node in the retrieval context directly addresses the user's concern about shoes not fitting, hence its high relevance and top ranking., error: None)

For test case:

  - input: What if these shoes don't fit?
  - actual output: We offer a 300-day full refund at no extra cost.
  - expected output: You are eligible for a 30 day full refund at no extra cost.
  - context: None
  - retrieval context: ['All customers are eligible for a 30 day full refund at no extra cost.']


Overall Metric Pass Rates

Contextual Precision: 100.00% pass rate




[TestResult(success=True, metrics_metadata=[MetricMetadata(metric='Contextual Precision', threshold=0.7, success=True, score=1.0, reason="The score is 1.00 because the only node in the retrieval context directly addresses the user's concern about shoes not fitting, hence its high relevance and top ranking.", strict_mode=False, evaluation_model='gpt-4', error=None, evaluation_cost=0.030809999999999997, verbose_logs='Verdicts:\n[\n    {\n        "verdict": "yes",\n        "reason": "The text \'All customers are eligible for a 30 day full refund at no extra cost.\' directly addresses the concern about shoes not fitting."\n    }\n]')], input="What if these shoes don't fit?", actual_output='We offer a 300-day full refund at no extra cost.', expected_output='You are eligible for a 30 day full refund at no extra cost.', context=None, retrieval_context=['All customers are eligible for a 30 day full refund at no extra cost.'])]