In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
import os
from pinecone import Pinecone, ServerlessSpec  # Updated Pinecone initialization
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Pinecone as LangchainPinecone  # Updated import
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate 
import google.generativeai as genai

In [4]:
# Configure Google Generative AI
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# Initialize Pinecone using the new class structure
pc = Pinecone(
    api_key=os.getenv("PINECONE_API_KEY")
)


PINECONE_INDEX = "ragmodel-similarity-test"

if PINECONE_INDEX not in pc.list_indexes().names():
  
    embedding_dimension = 768 
    pc.create_index(
        name=PINECONE_INDEX,
        dimension=embedding_dimension,
        metric='cosine',
        spec=ServerlessSpec(
            cloud='aws',       
            region='us-east-1'
        )
    )

In [5]:
def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            extracted_text = page.extract_text()
            if extracted_text:  # Ensure that text is extracted
                text += extracted_text
    return text

In [6]:
# Function to split text into chunks
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = text_splitter.split_text(text)
    return chunks

In [7]:
def get_vector_store(text_chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vector_store = LangchainPinecone.from_texts(text_chunks, embeddings, index_name=PINECONE_INDEX)
    # vector_store.save_local("faiss_index")


In [8]:
def get_conversational_chain():
    prompt_template = """
    Answer the questions as detailed as possible from the provided context. Make sure to provide all the details. 
    If the answer is not in the provided context, just say, "Answer is not available in the document." 
    Don't provide the wrong answer.

    Context:
    {context}

    Question:
    {question}

    Answer:
    """
    prompt = PromptTemplate(
        template=prompt_template,
        input_variables=["context", "question"]
    )
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
    return chain


In [9]:
def user_input(user_question):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    # Connect to the existing Pinecone index
    vector_store = LangchainPinecone.from_existing_index(PINECONE_INDEX, embeddings)
    # Perform similarity search
    docs = vector_store.similarity_search(user_question)
    # Get the QA chain
    chain = get_conversational_chain()
    # Get the response
    response = chain(
        {"input_documents": docs, "question": user_question},
        return_only_outputs=True
    )
    return response["output_text"]

In [9]:

# Path to your PDF document
pdf = os.getenv("PDF_FILE")

# Extract text from PDF
text = get_pdf_text([pdf])

# Split text into chunks
doc_chunks = get_text_chunks(text)

# Create and upload vectors to Pinecone
get_vector_store(doc_chunks)



In [10]:
prompt="what does data link layer do"
user_input(prompt)

stuff: https://python.langchain.com/v0.2/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/v0.2/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/v0.2/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/v0.2/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag
  chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
  response = chain(


'The data link layer transforms the physical layer, a raw transmission facility, to a reliable link. It makes the physical layer appear error-free to the upper layer (network layer).'

In [11]:
prompt2="what is data encapsulation?"
user_input(prompt2)

'Encapsulation is the process of adding a header and trailer to a packet at each layer of the OSI model. The header contains information about the source and destination of the packet, as well as the type of data being sent. The trailer contains information about the length of the packet and the checksum.'

In [13]:
prompt3="give a brief explaination about structure of osi model and each layer does"
user_input(prompt3)

'The OSI model is a layered framework for the design of network systems that allows communication between all types of computer systems. It consists of seven separate but related layers, each of which defines a part of the process of moving information across a network.\n\n1. Physical Layer: The physical layer is responsible for the physical transmission of data over a network. It defines the physical characteristics of the network, such as the type of cable, the connectors used, and the signaling method.\n\n2. Data Link Layer: The data link layer is responsible for the reliable transmission of data over a network. It defines the protocols used to establish and maintain a connection between two devices, and it provides error detection and correction mechanisms.\n\n3. Network Layer: The network layer is responsible for the routing of data over a network. It defines the protocols used to determine the best path for data to take from one device to another, and it provides mechanisms for a

In [10]:
prompt4="what is UDp, TCP, and SCTP ?"
user_input(prompt4)

stuff: https://python.langchain.com/v0.2/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/v0.2/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/v0.2/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/v0.2/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag
  chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
  response = chain(


'UDP (User Datagram Protocol) is a connectionless, unreliable transport protocol. It provides process-to-process communication using port numbers. UDP is suitable for applications that require simple request-response communication with little concern for flow and error control.\n\nTCP (Transmission Control Protocol) is a connection-oriented, reliable transport protocol. It adds connection-oriented and reliability features to the services of IP. TCP is suitable for applications that require reliable data transfer, such as file transfer and email.\n\nSCTP (Stream Control Transmission Protocol) is a reliable, message-oriented transport protocol. It combines the best features of UDP and TCP. SCTP is suitable for applications that require reliable data transfer and message-oriented communication, such as telephony and video conferencing.'

In [15]:
prompt5="what is cylic redundany check? name some error detection methods"
user_input(prompt5)

'Cyclic Redundancy Check (CRC) is a type of error detection method used in data transmission to detect errors in data. It involves adding a fixed-length checksum to the end of a data block, which is calculated based on the contents of the data block. The receiver of the data can then use the checksum to verify the integrity of the data and detect any errors that may have occurred during transmission.\n\nSome common error detection methods include:\n\n1. Parity check: This method involves adding an extra bit to each byte of data, which indicates whether the number of 1 bits in the byte is even or odd. If the parity bit does not match the actual number of 1 bits in the byte, an error is detected.\n\n2. Checksum: This method involves adding up all the bytes in a data block and taking the remainder when the sum is divided by a fixed number. The checksum is then appended to the end of the data block. The receiver of the data can then calculate the checksum of the received data and compare i