In [1]:
from dotenv import load_dotenv
import os

# Load .env file
load_dotenv()
groq_api_key = os.getenv('GROQ_API_KEY')

if not groq_api_key:
    raise ValueError("Groq API key not found in .env file")

In [5]:
import os
import pickle
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq

# Initialize the HuggingFace Embedding Model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from tqdm.autonotebook import tqdm, trange


In [3]:
# Load documents from a text file and add metadata
def load_documents(file_path):
    loader = TextLoader(file_path)
    documents = loader.load()
    
    # Add metadata to each document (e.g., file name)
    for doc in documents:
        doc.metadata["source"] = file_path
    return documents


In [12]:

# Split documents into chunks
def split_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    split_docs = text_splitter.split_documents(documents)
    return split_docs

In [5]:
# Create embeddings and handle storage
def embed_documents(split_docs, embedding_model):
    EMBEDDINGS_FOLDER = "embeddings"
    EMBEDDINGS_FILE = os.path.join(EMBEDDINGS_FOLDER, "emb01.pkl")

    if not os.path.exists(EMBEDDINGS_FOLDER):
        os.makedirs(EMBEDDINGS_FOLDER)

    if os.path.exists(EMBEDDINGS_FILE):
        print(f"Loading existing embeddings from {EMBEDDINGS_FILE}...")
        with open(EMBEDDINGS_FILE, 'rb') as f:
            embedded_docs = pickle.load(f)
    else:
        print("Creating new embeddings...")
        texts = [doc.page_content for doc in split_docs]
        embedded_docs = embedding_model.embed_documents(texts)

        with open(EMBEDDINGS_FILE, 'wb') as f:
            pickle.dump(embedded_docs, f)

    return embedded_docs


In [6]:
# Store embeddings in Chroma vector store
def store_embeddings(split_docs, embedding_model):
    vector_store = Chroma.from_documents(split_docs, embedding_model) 
    return vector_store

In [7]:
# Build the RAG pipeline
def build_rag_pipeline(vector_store):
    retriever = vector_store.as_retriever()
    return retriever


In [8]:

# Initialize the LLM
def initialize_llm():
    llm = ChatGroq(
        model="llama-3.1-70b-versatile",
        temperature=0.2,  # Lower temperature for more consistent evaluations
    )
    return llm

In [9]:
# Querying the retriever and LLM
def query_llm(llm, retriever, query):
    # Retrieve relevant documents
    results = retriever.get_relevant_documents(query)

    # Use the LLM to process the retrieved documents
    if results:
        # Combine results for the LLM prompt, and track their sources
        context = "\n".join([doc.page_content for doc in results])
        prompt = f"""Please consider information from 9th to 12th-grade textbooks. If the provided context is relevant to the student's query, use it to give a precise answer. Otherwise, answer based on your own knowledge, considering the material covered in books for these grades. Don't mention in response that the context is not relevant. You can simply say "I cannot find relevant data from your book but I will explain you the general concept about" and so on. At the end, encourage student to ask conversational questions related to the topics in the book based on the query.
        
        Context:
        {context}
        Student Query:
        {query}
        """
        response = llm.invoke(prompt)
        
        # Capture the actual text chunks used
        relevant_texts = [doc.page_content for doc in results]
        
        # Return both the response and relevant texts
        return response, relevant_texts
    else:
        return "No relevant documents found.", []


In [10]:
# Main execution flow
if __name__ == "__main__":
    # Load your document
    documents = load_documents('resources/9thComputerScience_updated.txt')
    split_docs = split_documents(documents)
    embedded_docs = embed_documents(split_docs, embedding_model)
    vector_store = store_embeddings(split_docs, embedding_model)
    retriever = build_rag_pipeline(vector_store)
    
    # Initialize the LLM
    llm = initialize_llm()


Loading existing embeddings from embeddings\emb01.pkl...


In [11]:
# Example query 
query = "explain boolean proposition"
response, relevant_texts = query_llm(llm, retriever, query)

# Output response and relevant text chunks
print(response.content)
print("==============================================")
print("\nRelevant text chunks used in the response:")
for text in relevant_texts:
    print("Chunk: ==============================")
    print(text[:300])

  results = retriever.get_relevant_documents(query)


A Boolean proposition is a statement that can be either true or false. In the context of Boolean algebra, a proposition is a statement that can be assigned a truth value, which is either true (T) or false (F).

In the context of your textbook, a proposition is defined as a statement that can be assigned a truth value based on its truthfulness or falsity. For example, the statement "Islamabad is the capital of Pakistan" is a proposition that can be assigned a truth value of true.

A proposition can be simple or compound. A simple proposition is a single statement that can be assigned a truth value, whereas a compound proposition is a statement that is made up of two or more simple propositions combined using logical operators such as AND, OR, and NOT.

For example, the statement "It is raining and today is Sunday" is a compound proposition that is made up of two simple propositions: "It is raining" and "Today is Sunday". The truth value of this compound proposition depends on the truth 

In [12]:
for text in relevant_texts:
    print("=====================================")
    print(text)

operator is shown in Table 2-11.
Truth Table for complex Boolean expressions: We can make truth table for any combination of these operators. For NOT(P) Q NOT(P)AND Q
example, if we need to make a truth table - of “It is not raining and today is Sunday”.
It means the proposition NOT(P) AND Q.
P T T F The truth table for this compound Table 2-12 proposition is shown Table 2-12.
2.5.5 Laws of Boolean Algebra The laws of Boolean Algebra help us to simplify complex Boolean expressions. Some laws are discussed in the following.
e Commutative Law Commutative Law states that the order of application of two separate propositions is not important. So,
a) A.B=B8B.A (The order in which two variables are AND’ed makes no difference.)
b) A+B=B+A _ (The order in which two variables are OR’ed makes no difference.)
We can use truth tables (Table 2-13a, Table 2-13b) to verify this law for AND and OR operations respectively.
Table 2-13a Table 2-13b
Do you know?
True and False are called Boolean values. T

In [13]:
# Example query 
query = "who gave the idea of boolean values and on what date"
response, relevant_texts = query_llm(llm, retriever, query)

# Output response and relevant text chunks
print(response.content)
print("==============================================")
print("\nRelevant text chunks used in the response:")
for text in relevant_texts:
    print("Chunk: ==============================")
    print(text[:300])

The idea of Boolean values was given by George Boole in his book "The Laws of Thought". George Boole was born on 2 November 1815 and died on 8 December 1864.

Now, can you think of a scenario where you would use Boolean values in real life? For example, how would you use Boolean values to represent the status of a light switch?

Relevant text chunks used in the response:
Do you know?
True and False are called Boolean values. The idea was given by George Boole (2 November 1815 —8 December 1864) in his book “The Laws of Thought”.
2.5.2 Truth Values
Every proposition takes one of two values true or false, and these values are called the truth values. Truth value is giv
operator is shown in Table 2-11.
Truth Table for complex Boolean expressions: We can make truth table for any combination of these operators. For NOT(P) Q NOT(P)AND Q
example, if we need to make a truth table - of “It is not raining and today is Sunday”.
It means the proposition NOT(P) AND Q.
P T T 
A network of networks




In [14]:
# Example query 
query = "how ip4 and ip6 works"
response, relevant_texts = query_llm(llm, retriever, query)

# Output response and relevant text chunks
print(response.content)
print("==============================================")
print("\nRelevant text chunks used in the response:")
for text in relevant_texts:
    print("Chunk: ==============================")
    print(text[:300])

I'll explain how IPv4 and IPv6 work.

IPv4 (Internet Protocol Version 4) is the original standard for IP addressing. It uses 32 bits to store the whole IP address, which is divided into four groups separated by dots (e.g., 172.16.54.1). Each group can contain a decimal value from 0 to 255. Since each group requires 8 bits, the total 32 bits are required to store the whole IP address in IPv4 standard.

Here's how IPv4 works:

1. When a device connects to a network, it is assigned an IP address by a Dynamic Host Configuration Protocol (DHCP) server.
2. The IP address is used to identify the device on the network.
3. When data is sent from one device to another, the IP address is used to route the data packets to the correct destination.

IPv6 (Internet Protocol Version 6) is a newer standard that was introduced to accommodate the increasing number of devices connecting to the internet. It uses 128 bits to store the whole IP address, which is divided into eight groups separated by colons 