In [None]:
from langchain_groq import ChatGroq
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone

In [18]:
from dotenv import load_dotenv
load_dotenv()

True

In [19]:
import os

In [20]:
## Lets Read the document
def read_doc(directory):
    file_loader=PyPDFDirectoryLoader(directory)
    documents=file_loader.load()
    return documents

In [None]:
doc=read_doc('documents/')
len(doc)
first_doc = doc[0]  
print(first_doc.page_content)  
print(first_doc.metadata) 

Artificial Intelligence vs. Human
Intelligence: A Comprehensive Comparison
Author Name
June 19, 2025
Abstract
This article explores the multifaceted comparison between artificial intelligence
(AI) and human intelligence, delving into cognitive abilities, collaboration, ethi-
cal considerations, creativity, and future implications. By examining strengths,
limitations, and synergies, we aim to provide a balanced perspective on how AI
and humans can coexist and complement each other in an evolving technologi-
cal landscape.
Contents
1 Introduction . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .3
2 Cognitive Abilities . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .3
2.1 Processing Speed and Accuracy. . . . . . . . . . . . . . . . . . . . . . . 3
2.2 Memory and Recall . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3
2.3 Learning and Adaptation. . . . . . . . . . . . . . . . . . . . . . . . . . . 4
2.4 Problem-Solving . . . . . . . . .

In [22]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunked_docs = text_splitter.split_documents(doc)
print(f"Number of chunks: {len(chunked_docs)}")

Number of chunks: 118


In [13]:

from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')


  from .autonotebook import tqdm as notebook_tqdm


In [23]:
vectors=embeddings.embed_query("How are you?")
len(vectors)

384

In [None]:
from pinecone import Pinecone
api_key = os.getenv("PINECONE_API_KEY")
if not api_key:
    raise ValueError("PINECONE_API_KEY not found in .env file")

pc = Pinecone(api_key=api_key)
index_name = "ragapp"
index = pc.Index(index_name)

In [28]:
# Embed chunked documents
texts = [doc.page_content for doc in chunked_docs]
vectors = embeddings.embed_documents(texts)

In [31]:
# Prepare data for upsert
data = [
    (
        str(i),                    # Unique ID for each vector
        vectors[i],                # Embedding vector
        {"text": texts[i], **chunked_docs[i].metadata}  # Metadata (text and original metadata)
    )
    for i in range(len(vectors))
]

# Upsert to Pinecone
index.upsert(vectors=data)

{'upserted_count': 118}

In [32]:
# Verify upsert
stats = index.describe_index_stats()
print(f"Index stats: {stats}")

Index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 118}},
 'total_vector_count': 118,
 'vector_type': 'dense'}


In [None]:
def pinecone_similarity_search(query, k=2):
    query_embedding = embeddings.embed_query(query)
    response = index.query(
        vector=query_embedding,
        top_k=k,
        include_metadata=True
    )
    return response

In [34]:
from langchain.chains.question_answering import load_qa_chain

In [35]:
api_key_groq = os.getenv("GROQ_API_KEY")
llm = ChatGroq(
    api_key=api_key_groq, 
    model="allam-2-7b",
    temperature=0.7
)
chain=load_qa_chain(llm,chain_type="stuff")

stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  chain=load_qa_chain(llm,chain_type="stuff")


In [37]:
# Initialize Pinecone vector store for langchain
from langchain_pinecone import PineconeVectorStore
api_key = os.getenv("PINECONE_API_KEY")
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings, pinecone_api_key=api_key)

In [38]:
# Set up RetrievalQA chain
from langchain.chains import RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 2})
)

In [39]:
# Search answers
def retrieve_answers(query):
    if not query or not isinstance(query, str):
        raise ValueError("Query must be a non-empty string")
    try:
        response = qa_chain.run(query)
        return response
    except Exception as e:
        raise RuntimeError(f"Failed to retrieve answer: {str(e)}")


In [40]:
# Test
query = "What is the main topic of the documents?"
answer = retrieve_answers(query)
print(f"Answer: {answer}")

  response = qa_chain.run(query)


Answer: Based on the provided information, the main topics of the documents seem to be:

1. 8.2 Case Study: AI in Journalism
2. 8.3 Case Study: Autonomous Surgery
3. 9 Challenges in Integration
4. 9.1 Technical Limitations
5. 9.2 Cultural Resistance

These topics are discussed in the context and appear to be related to the integration of artificial intelligence (AI) and its challenges in various fields such as journalism, surgery, and cultural resistance. However, the specific document you are referring to is not mentioned, so I cannot determine its main topic without further information. 
