In [1]:
#installing pdfquery, library that helps extract data from PDF files
!pip install pdfquery



In [2]:
#importing all the neccessary libraries
import pdfquery                     
from collections import Counter     
import math
import requests
import json
import re

In [3]:
#defining a funciton to calculate the cosine similarity between the query entered by used and relevant document from the corpus
def cosine_similarity(query, document):
    #tokenization
    query_tokens = query.lower().split(" ")
    document_tokens = document.lower().split(" ")
    
    #counter for query and document
    query_counter = Counter(query_tokens)
    document_counter = Counter(document_tokens)
    
    #calculating dot prod
    dot_product = sum(query_counter[token] * document_counter[token] for token in query_counter.keys())
    
    #calculating magnitudes
    query_magnitude = math.sqrt(sum(query_counter[token]**2 for token in query_counter))
    document_magnitude = math.sqrt(sum(document_counter[token]**2 for token in document_counter))
    
    #calculating similarity
    #incase (query_magnitude*document_magnitude)=0, the value of similarity becomes 0 else as calculated
    similarity = dot_product/(query_magnitude*document_magnitude) if query_magnitude*document_magnitude!=0 else 0
    
    return similarity

In [4]:
#function to return a document similar to the query from the corpus
def return_response(query, corpus):
    similarities = []
    for doc in corpus:
        similarity = cosine_similarity(query, doc)      #calls the cosine_similarity function
        similarities.append(similarity)                 #adds the calculated similarity score to the similarities list
        
    return corpus[similarities.index(max(similarities))]

In [5]:
#creating a infinite loop until the user wants to terminate the program
while(True):
    print("Choose the RAG model you want to interact with: ")
    print("1. Data Structures RAG Model")
    print("2. Programming Paradigms RAG Model")
    print("3. Computer Networks RAG Model")
    print("4. Terminate the program")
    choice = int(input("Enter your choice: "))        #accepting the model choice of the user from the menu displayed
    
    pdf = []
    if(choice==1):
        pdf = pdfquery.PDFQuery("Data Structures Corpus.pdf")
    elif(choice==2):
        pdf = pdfquery.PDFQuery("Programming Paradigms.pdf")
    elif(choice==3):
        pdf = pdfquery.PDFQuery("Computer Networks.pdf")
    else:
        print("Closing Program.. Thank you!!")
        break
        
    
    pdf.load()
    # Extract all text elements
    text_elements = pdf.pq('LTTextLineHorizontal')
    text = " ".join([t.text for t in text_elements])  # Combine all text

    # Split text into sentences using regex
    corpus = re.split(r'(?<=[.!?]) +', text)
   
    while(True):
        print("Enter Quit to exit from the model")
        user_query = input(">>>")
        
        relevant_document = return_response(user_query,corpus)

        if user_query.lower() == 'quit':
            print("\n\n")
            print(f"{'=' * 30}")
            break
        full_response = []

        prompt="""
        You are a bot that makes recommendations for activities and lifestyle changes.
        You answer only in 70 words making sure you encourage the user.
        This is the recommended activity: {relevant_document}
        The user input is: {user_input}
        Compile a recommendation to the user based on the recommended activity and the user input.
        """

        url = "http://localhost:11434/api/generate"

        #now reuqesting above URl which is running locally to execute below command
        #we are using json format for making the request

        data = {
            "model": "llama3",
            "prompt": prompt.format(user_input=user_query, relevant_document=relevant_document)
        }

        headers = {'Content-Type': 'application/json'}

        response = requests.post(url, data=json.dumps(data), headers=headers, stream=True)


        try:
            for line in response.iter_lines():
            #filter out keep-alive new lines
                if line:
                    decoded_line = json.loads(line.decode('utf-8'))
                    full_response.append(decoded_line['response'])
        finally:
            response.close()

        print(''.join(full_response))
        print(f"{'*' * 30}")

Choose the RAG model you want to interact with: 
1. Data Structures RAG Model
2. Programming Paradigms RAG Model
3. Computer Networks RAG Model
4. Terminate the program
Enter your choice: 1
Enter Quit to exit from the model
>>>what are strings?
I'm excited to help you explore the world of coding! Strings, my friend, are the building blocks of programming. They're sequences of characters used for text manipulation. To get started, I recommend practicing with some fun string exercises, such as reversing a sentence or finding specific words within a text. This will help you develop your problem-solving skills and get comfortable working with strings. You got this!
******************************
Enter Quit to exit from the model
>>>which structure consists of vertices and edges?
I love that you're curious about graphs! Since you asked, "which structure consists of vertices and edges?", I'm excited to recommend an activity for you.

Why not try creating your own graph? You can use online to