In [12]:
from langchain.vectorstores import Weaviate
import weaviate
import os
from dotenv import load_dotenv

load_dotenv()

WEAVIATE_URL = os.getenv("WEAVIATE_CLUSTER")
WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY")


client = weaviate.Client(
    url = WEAVIATE_URL , auth_client_secret = weaviate.AuthApiKey(WEAVIATE_API_KEY) 
)

Python client v3 `weaviate.Client(...)` connections and methods are deprecated and will
            be removed by 2024-11-30.

            Upgrade your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.
                - For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
                - For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration

            If you have to use v3 code, install the v3 client and pin the v3 dependency in your requirements file: `weaviate-client>=3.26.7;<4.0.0`
  client = weaviate.Client(


In [13]:
# embedding

from langchain.embeddings import HuggingFaceBgeEmbeddings
embedding_model_name = "Sentence-Transformers/all-mpnet-base-v2"


# model_kwargs = {"device" : "cuda"}

embeddings  = HuggingFaceBgeEmbeddings(
    model_name = embedding_model_name ,
    # model_kwargs = model_kwargs
)

  from tqdm.autonotebook import tqdm, trange


In [8]:
# loading pdf

from langchain.document_loaders import PyPDFLoader

path = 'CNet.pdf'

loader = PyPDFLoader(path , extract_images = True)
pages = loader.load()

invalid pdf header: b'<!DOC'
incorrect startxref pointer(1)


In [9]:
pages

[Document(metadata={'source': 'CNet.pdf', 'page': 0}, page_content='Course Handout  (2023-24 ODD SEMESTER)\n \n \nSubject Name/Code :  Computer Networks (BTCS-T-PC-013) Branch/Sem/Batch :\nName of Faculty:  Kasturi Dhal , Amarjeet Mohanty , Ranjit Kumar Behera , SASMITA PARIDA , KAILASH\nCHANDRA MISHRA , MILAN SAMANTARAY\nScope & Objective -:\n  \nThe objective of this course is to study the fundamental concepts of computer networks and develop an understanding of modern\nnetwork architectures from design & performance perspective.\nPre-Requisite -:\n  \nDetailed Syllabus:\nModule# CO Topics Hours\nModule-1 CO1Introduction: Overview of Data Communication Networks,\nProtocols and standards, OSI Reference model, TCP/IP\nProtocol; Physical Layer: Analog Signals, Digital Signals, Data\nRate Limits, Transmission Impairment; Digital Transmission:\nDigital-to-Digital & Analog-to- Digital conversion, Transmission\nModes; Analog Transmission: Digital-to-Analog & Analogto-\nAnalog conversion; Mu

In [14]:
# chunking


from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000 , chunk_overlap= 20)

docs = text_splitter.split_documents(pages)

In [16]:
# docs

In [19]:
vector_db = Weaviate.from_documents(
    docs , embeddings , client = client , by_text= False
)

In [24]:
print(
    vector_db.similarity_search(
        "what is the syllabus?" , k =3
)[0].page_content
)

5. http://intronetworks.cs.luc.edu/current/ComputerNetworks.pdf: eBook by Prof. P. L. Dordal, Loyola University, Chicago, USA
Course Outcome:
CO1Describe the basics of computer networks, topology, TCP/IP, and OSI reference models and various
techniques and modes of transmission (Analog and Digital).
CO2Compare various Data Link protocols, Error detecting mechanisms, Multi-Channel Access protocols
and IEEE 802.xx standards for LAN.
CO3Describe IPv4 & IPv6 addressing schemes, subnets, routing principles and algorithms used in the
network layer.
CO4Explain the protocols of Transport & Application layers and understand the working principles of
Internet & theWorld WideWeb.
CO5Explain the principles of DNS hierarchy and working principles of various Application layer
protocols.
Program Outcomes Relevent to the Course:
PO1Engineering knowledge: Apply the knowledge of mathematics, science, engineering fundamentals,


In [25]:
# Generation using LLM and retrived context 

In [26]:
from langchain.prompts import ChatPromptTemplate

template = """
You are an assistent for question-answering task .
Use the following piece of retrived context to answer the question .
If you don't know ,just say idk.
Use 10 sentence at maximux and keep the answer concise.
Question : {question}
Context : {context}
Answer : 
"""



In [27]:
prompt = ChatPromptTemplate.from_template(template)

In [38]:
# importing LLM from hugging face

from langchain import HuggingFaceHub

load_dotenv()

huggingfacehub_api_token = os.getenv("HUGGINGFACE_TOKEN")

In [39]:
model = HuggingFaceHub(
    huggingfacehub_api_token=huggingfacehub_api_token,
    repo_id = 'mistralai/Mistral-7B-Instruct-v0.1',
    model_kwargs={'temperature' : 1 , "max_length" : 180}
)

In [40]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

In [41]:
output_parser = StrOutputParser()
retriever = vector_db.as_retriever()

In [42]:
rag_chain = (

    {'context' :retriever  , 'question' : RunnablePassthrough()}
    | prompt
    | model
     |output_parser

)

In [46]:
output = rag_chain.invoke("What is this context about ?")

In [53]:
print(output[3946:])

Answer : 
The context appears to be related to the field of engineering and the skills and knowledge required for effective communication, project management, and problem-solving in engineering activities. The context includes information on the use of engineering and IT tools, as well as the importance of understanding the limitations of these tools. The context also emphasizes the need for ethical principles and the application of these principles to professional engineering practice. The context includes information on the impact of engineering solutions on societal and environmental contexts, and the
