In [None]:
import torch

# Set device to MPS
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# # Example Tensor
# x = torch.randn(3, 3).to(device)
# print(x)


tensor([[ 1.2558e+00, -8.5910e-05, -5.0274e-01],
        [ 5.6675e-01, -9.8424e-01, -4.6679e-01],
        [-9.9066e-01,  8.3942e-01, -3.3284e-01]], device='mps:0')


In [2]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
import ollama
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
import psutil

In [3]:
from langchain.docstore.document import Document
file_path = "document/raw_data.txt"  

with open(file_path, 'r', encoding='utf-8') as file:
    doc_text = file.read()

doc = Document(page_content=doc_text)

In [4]:
doc



In [5]:
text_spliter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap = 30, separator = "\n")
docs = text_spliter.split_documents(documents = [doc])

Created a chunk of size 1122, which is longer than the specified 1000
Created a chunk of size 1398, which is longer than the specified 1000
Created a chunk of size 1141, which is longer than the specified 1000
Created a chunk of size 1092, which is longer than the specified 1000


In [6]:
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"   # It maps sentences & paragraphs to a 384 dimensional dense vector space
model_kwargs = {'device': device} # it will use googl cloud gpu..
embeddings = HuggingFaceEmbeddings(model_name=embedding_model, model_kwargs=model_kwargs) #model_kwargs=model_kwargs

  embeddings = HuggingFaceEmbeddings(model_name=embedding_model, model_kwargs=model_kwargs) #model_kwargs=model_kwargs


In [7]:
vectorstore = FAISS.from_documents(docs, embeddings)

In [8]:
vectorstore.save_local("faiss_index")

In [9]:
embedding_vectorestore = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

In [10]:
retriever = embedding_vectorestore.as_retriever(search_type="similarity")

In [11]:
from langchain_ollama import OllamaLLM
llm = OllamaLLM(model="llama3.2")

In [12]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

In [13]:
# curl -fsSL https://ollama.ai/install.sh | sh

# ollama serve & ollama pull llama3.2

In [14]:
response = qa.invoke("What is the maximum fine for contravening the provisions of the Act?")
response

{'query': 'What is the maximum fine for contravening the provisions of the Act?',
 'result': "The maximum fine for contravening the provisions of the Act is $20,000. This is stated in section (8) and section (9), which outline the fines for certain specific contraventions, but it's also mentioned that for other sections and regulations, the maximum fine is not specified here but the previous laws state a range from  $200 to $20,000"}

In [30]:
response = qa.invoke("Is Trail permit required?")
response

{'query': 'Is Trail permit required?',
 'result': 'Yes, according to section 2.1(1), no person shall drive a motorized snow vehicle upon a prescribed trail except under the authority of, and in accordance with, a trail permit for the motorized snow vehicle issued under subsection (2).'}

In [31]:
response = qa.invoke("Does the municiple maintains any record of population?")
response

{'query': 'Does the municiple maintains any record of population?',
 'result': "I don't know the answer to your question based on the provided context. The text appears to be related to records kept by the Ministry responsible for motor vehicle regulations in Ontario, Canada, but it does not mention population records or municipal records."}

In [17]:
response = qa.invoke("What are the penalties for careless driving?")
response

{'query': 'What are the penalties for careless driving?',
 'result': 'The penalties for careless driving under the context provided are:\n\n* A fine of not less than $400 and not more than $2,000 or to imprisonment for a term of not more than six months, or to both (section 17).\n \nAdditionally, if bodily harm or death is caused, the driver may be liable to a higher penalty of:\n* A fine of not less than $2,000 and not more than $50,000 or to imprisonment for a term of not more than two years, or to both.'}

In [18]:
response = qa.invoke("What are the penalties for careless driving?")
response

{'query': 'What are the penalties for careless driving?',
 'result': "The penalties for careless driving under the provided context are as follows:\n\n* A fine of not less than $400 and not more than $2,000 or imprisonment for a term of not more than six months.\n* In addition to the above, a person's driver’s licence or permit may be suspended for a period of not more than two years.\n\nNote that there is also a separate section (5) in the provided context regarding commercial motor vehicles and careless driving, but it does not specify different penalties."}

In [19]:
import pandas as pd

df = pd.read_excel('TrainingDataset.xlsx')


In [20]:
from sentence_transformers import SentenceTransformer, util
sentenceTransformer = SentenceTransformer('all-MiniLM-L6-v2')


In [21]:
# Function to compute ROUGE-L score between correct answer and model's answer
def compute_rouge_l(correct_answer, model_answer):
    scorer = rouge_scorer.RougeScorer(["rougeL"], use_stemmer=True)
    score = scorer.score(correct_answer, model_answer)
    return score["rougeL"].fmeasure  # Return ROUGE-L F1 score


In [25]:
import psutil
import time
from rouge_score import rouge_scorer
from memory_profiler import memory_usage

similarities = []
responses = []
similarities = []
answer_cosine_similarities = []
rouge_l_scores = []
times = []
memory_usage_MB = []

# Get the current process for memory tracking
process = psutil.Process()


In [27]:
def getAnswer(question):
    return qa.invoke(question)

In [28]:
for index, row in df.iterrows():
    question, true_answer = row['Question'], row['Answer']

    start_time = time.time()
    initial_memory = process.memory_info().rss / (1024 ** 2)  # Memory in MB
    mem_usage_before = memory_usage(process.pid, interval=0.1, timeout=1)

    generated_response = getAnswer(question)
    end_time = time.time()
    mem_usage_after = memory_usage(process.pid, interval=0.1, timeout=1)
    memory_used = max(mem_usage_after) - min(mem_usage_before)
    
    generated_response = generated_response.get('result', "No response generated")  # Default message if 'result' is not in dictionary
 
    # Ensure the response is in the correct format for encoding
    if isinstance(generated_response, str):
        responses_to_encode = [generated_response]
    elif isinstance(generated_response, list):
        responses_to_encode = generated_response
    else:
        print("Unexpected response type:", type(generated_response))
        continue

    rouge_l_score = compute_rouge_l(true_answer, generated_response)  
    # Encode both the generated response and the true answer
    response_embedding = sentenceTransformer.encode(responses_to_encode)
    answer_embedding = sentenceTransformer.encode([true_answer])

    # Compute cosine similarity
    similarity = util.cos_sim(response_embedding, answer_embedding)
    similarities.append(similarity.item())  # Store the scalar similarity value

    # Compute time taken by the model to give each answer
    times.append(end_time - start_time)  

    # Compute memory usage by each answer
    memory_usage_MB.append(memory_used)  

    responses.append(generated_response)

    rouge_l_scores.append(rouge_l_score)

In [29]:
# Add the results as new columns in the DataFrame
df['Model_Response'] = responses
df['Cosine_Similarity'] = similarities
df['ROUGE_L_Score'] = rouge_l_scores
df['Time_Taken_Sec'] = times
df['Memory_Usage_MB'] = memory_usage_MB
# Optionally, add the similarities back to the DataFrame
df.to_excel('Metrics_llama3.2.xlsx', index=False)