In [1]:
# Import necessary libraries and load environment variables
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv
load_dotenv()

# Initialize the ChatGroq model with the API key
llm = ChatGroq(model='qwen-2.5-32b', api_key=os.getenv("GROQ_API_KEY"))

# Invoke the model with a simple greeting
llm.invoke("hi")

AIMessage(content='Hello! How can I assist you today?', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 30, 'total_tokens': 40, 'completion_time': 0.05, 'prompt_time': 0.003674423, 'queue_time': 0.049660696, 'total_time': 0.053674423}, 'model_name': 'qwen-2.5-32b', 'system_fingerprint': 'fp_c527211fd1', 'finish_reason': 'stop', 'logprobs': None}, id='run-dfce8e15-ac1f-476b-ae4a-449c1ecc6e70-0', usage_metadata={'input_tokens': 30, 'output_tokens': 10, 'total_tokens': 40})

In [2]:
# Import the PyPDFLoader from langchain_community.document_loaders
from langchain_community.document_loaders import PyPDFLoader

In [3]:
# Load the PDF document
loader = PyPDFLoader("Ch.01_Introduction_ to_computers.pdf")
docs = loader.load()

In [4]:
# Display the loaded documents
docs

[Document(metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2015-02-25T19:41:04+02:00', 'author': 'just', 'moddate': '2015-02-25T19:41:04+02:00', 'source': 'Ch.01_Introduction_ to_computers.pdf', 'total_pages': 10, 'page': 0, 'page_label': '1'}, page_content='1 \nChapter One \nIntroduction to Computer \n \nComputer \nA computer is an electronic device, operating under the control of instructions stored \nin its own memory that can accept data (input), process the data according to specified \nrules, produce information (output), and store the information for future use1. \n \nFunctionalities of a computer2  \nAny digital computer carries out five functions in gross terms:  \n \n \n \n \n \n \n \n \n \nComputer Components \nAny kind of computers consists of HARDWARE AND SOFTWARE. \n \nHardware: \nComputer hardware is the collection of  physical elements that constitutes a computer \nsystem. Computer hardware refers to the physical parts or

In [5]:
# Import the RecursiveCharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Split the documents into chunks of 2000 characters
splitter = RecursiveCharacterTextSplitter(chunk_size=2000)
sp_doc = splitter.split_documents(docs)

In [6]:
# Display the split documents
sp_doc

[Document(metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2015-02-25T19:41:04+02:00', 'author': 'just', 'moddate': '2015-02-25T19:41:04+02:00', 'source': 'Ch.01_Introduction_ to_computers.pdf', 'total_pages': 10, 'page': 0, 'page_label': '1'}, page_content='1 \nChapter One \nIntroduction to Computer \n \nComputer \nA computer is an electronic device, operating under the control of instructions stored \nin its own memory that can accept data (input), process the data according to specified \nrules, produce information (output), and store the information for future use1. \n \nFunctionalities of a computer2  \nAny digital computer carries out five functions in gross terms:  \n \n \n \n \n \n \n \n \n \nComputer Components \nAny kind of computers consists of HARDWARE AND SOFTWARE. \n \nHardware: \nComputer hardware is the collection of  physical elements that constitutes a computer \nsystem. Computer hardware refers to the physical parts or

In [7]:
# Import GoogleGenerativeAIEmbeddings and initialize with API key
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key="AIzaSyDdQlEYR4DDfSKWXDC8rL4gP2aVS1VaDwM")

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# Import necessary libraries for FAISS and initialize the vector store
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

from uuid import uuid4

# Create a FAISS index
index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

# Initialize the FAISS vector store
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

# Add the split documents to the vector store
vector_store.add_documents(documents=sp_doc)

['1e789e3f-e593-4ccd-a2f9-f52ade049e92',
 '068ac446-be59-4d72-9138-a9b1f9fe3400',
 'bef3bada-698c-4f35-b690-ba5044bf5c53',
 '0141692a-0b24-40b2-859a-608116d5eeb8',
 'b8efa845-53c1-4afb-969f-48758a3c4e5b',
 '393d90fe-b541-4f49-90da-274861e82e75',
 '1f435b5f-6169-4c40-b8b4-acb9b1c1f461',
 '2bb5cb68-79c6-495a-b39c-ed8bce09c204',
 '4af11864-e823-4f88-95de-3dc519669665',
 'fa1ac815-dcb8-4f03-a9b8-f2af4961b00e',
 'b494e6bb-234b-4bd3-875a-97c628a59a89',
 'c42f8eaf-b5b2-43e9-80ca-c35bf9996df5',
 'd4b2b318-b920-4eb1-b55f-8b4fefc43f96',
 '22255c3c-202d-4c06-907f-38a4d85c5b4a',
 'c01dbca4-54eb-439a-8093-ff5bdc7340cc',
 'ad2180cc-6819-47fc-9ac4-0458a857dcff']

In [9]:
# Convert the vector store to a retriever
retriever = vector_store.as_retriever()

In [10]:
# Use the retriever to find information about software types
retriever.invoke("what are type of Software Types")

[Document(id='393d90fe-b541-4f49-90da-274861e82e75', metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2015-02-25T19:41:04+02:00', 'author': 'just', 'moddate': '2015-02-25T19:41:04+02:00', 'source': 'Ch.01_Introduction_ to_computers.pdf', 'total_pages': 10, 'page': 4, 'page_label': '5'}, page_content='5 \nSoftware \nSoftware is a generic term for organized collections of computer data and instructions, \noften broken into two major  categories: system software that provides the basic non -\ntask-specific functions of the computer, and application software which is used  by \nusers to accomplish specific tasks.  \n \nSoftware Types \nA. System software  is responsible for controlling, integrating, and  managing the \nindividual hardware components of a computer system so  that other software and \nthe users of the system see it as a functional unit  without having to be concerned \nwith the low -level details such as  transferring data fro

In [11]:
# Define a question and use the retriever to get the data
question = "give the website url link in the document"

data = retriever.invoke(question)
data

[Document(id='fa1ac815-dcb8-4f03-a9b8-f2af4961b00e', metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2015-02-25T19:41:04+02:00', 'author': 'just', 'moddate': '2015-02-25T19:41:04+02:00', 'source': 'Ch.01_Introduction_ to_computers.pdf', 'total_pages': 10, 'page': 6, 'page_label': '7'}, page_content='Some mobile devices are more powerful, and they allow you to do many  of \n                                                           \n ***http://www.acobas.net/teaching/survival/handouts/pcwebopedia.pdf'),
 Document(id='22255c3c-202d-4c06-907f-38a4d85c5b4a', metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2015-02-25T19:41:04+02:00', 'author': 'just', 'moddate': '2015-02-25T19:41:04+02:00', 'source': 'Ch.01_Introduction_ to_computers.pdf', 'total_pages': 10, 'page': 8, 'page_label': '9'}, page_content='• Versatility: Computer is versatile in nature. It can perform different types of  tasks \

In [12]:
# Use the LLM to get the final result based on the question and retrieved data
result = llm.invoke(f"my question is {question} and the answer is contain in this docs.{data}")

In [13]:
# Print the result content
print(result.content)

From the snippets of the documents provided, the URLs contained within the documents are:

1. [http://www.acobas.net/teaching/survival/handouts/pcwebopedia.pdf](http://www.acobas.net/teaching/survival/handouts/pcwebopedia.pdf)
2. [http://computer.howstuffworks.com/virus.htm](http://computer.howstuffworks.com/virus.htm)
3. [http://www.us-cert.gov/publications/virus-basics](http://www.us-cert.gov/publications/virus-basics)

These URLs appear to point to external documents or webpages that may contain more detailed information about computer terminology, viruses, and virus protection basics, respectively.


In [14]:
# Empty cell