In [2]:
from langchain_groq import ChatGroq
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone

In [30]:
from dotenv import load_dotenv
load_dotenv()

True

In [31]:
import os

In [5]:
## Lets Read the document
def read_doc(directory):
    file_loader=PyPDFDirectoryLoader(directory)
    documents=file_loader.load()
    return documents

In [6]:
doc=read_doc('documents/')
len(doc)
first_doc = doc[0]  
print(first_doc.page_content)  
print(first_doc.metadata) 

Artificial Intelligence vs. Human
Intelligence: A Comprehensive Comparison
Author Name
June 19, 2025
Abstract
This article explores the multifaceted comparison between artificial intelligence
(AI) and human intelligence, delving into cognitive abilities, collaboration, ethi-
cal considerations, creativity, and future implications. By examining strengths,
limitations, and synergies, we aim to provide a balanced perspective on how AI
and humans can coexist and complement each other in an evolving technologi-
cal landscape.
Contents
1 Introduction . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .3
2 Cognitive Abilities . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .3
2.1 Processing Speed and Accuracy. . . . . . . . . . . . . . . . . . . . . . . 3
2.2 Memory and Recall . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3
2.3 Learning and Adaptation. . . . . . . . . . . . . . . . . . . . . . . . . . . 4
2.4 Problem-Solving . . . . . . . . .

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunked_docs = text_splitter.split_documents(doc)
print(f"Number of chunks: {len(chunked_docs)}")

Number of chunks: 118


In [8]:

from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')


  from .autonotebook import tqdm as notebook_tqdm


In [9]:
vectors=embeddings.embed_query("How are you?")
len(vectors)

384

In [10]:
from pinecone import Pinecone
api_key = os.getenv("PINECONE_API_KEY")
if not api_key:
    raise ValueError("PINECONE_API_KEY not found in .env file")

pc = Pinecone(api_key=api_key)
index_name = "ragapp"
index = pc.Index(index_name)

In [11]:
# Embed chunked documents
texts = [doc.page_content for doc in chunked_docs]
vectors = embeddings.embed_documents(texts)

In [12]:
# Prepare data for upsert
data = [
    (
        str(i),                    # Unique ID for each vector
        vectors[i],                # Embedding vector
        {"text": texts[i], **chunked_docs[i].metadata}  # Metadata (text and original metadata)
    )
    for i in range(len(vectors))
]

# Upsert to Pinecone
index.upsert(vectors=data)

{'upserted_count': 118}

In [13]:
# Verify upsert
stats = index.describe_index_stats()
print(f"Index stats: {stats}")

Index stats: {'dimension': 384,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 118}},
 'total_vector_count': 118,
 'vector_type': 'dense'}


In [15]:
def pinecone_similarity_search(query, k=2):
    query_embedding = embeddings.embed_query(query)
    response = index.query(
        vector=query_embedding,
        top_k=k,
        include_metadata=True
    )
    return response
test = pinecone_similarity_search("Memory and Recall")
print(test)

{'matches': [{'id': '12',
              'metadata': {'creationdate': '2025-06-19T13:48:20+00:00',
                           'creator': 'LaTeX with hyperref',
                           'page': 2.0,
                           'page_label': '3',
                           'producer': 'xdvipdfmx (20220710)',
                           'source': 'documents\\ai_vs_human.pdf',
                           'text': 'imal errors. Humans, however, rely on '
                                   'slower neural processing but excel in\n'
                                   'contextual analysis, often making '
                                   'intuitive decisions where data is '
                                   'incomplete.\n'
                                   '2.2 Memory and Recall\n'
                                   'AI has virtually unlimited storage '
                                   'capacity, retaining data with perfect '
                                   'fidelity.\n'
                  

In [16]:
from langchain.chains.question_answering import load_qa_chain

In [17]:
api_key_groq = os.getenv("GROQ_API_KEY")
llm = ChatGroq(
    api_key=api_key_groq, 
    model="allam-2-7b",
    temperature=0.7
)
chain=load_qa_chain(llm,chain_type="stuff")

stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  chain=load_qa_chain(llm,chain_type="stuff")


In [18]:
# Initialize Pinecone vector store for langchain
from langchain_pinecone import PineconeVectorStore
api_key = os.getenv("PINECONE_API_KEY")
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings, pinecone_api_key=api_key)

# experiment start here

In [19]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from supabase import create_client, Client
import uuid

In [20]:
# Initialize conversation memory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


In [21]:
# Set up ConversationalRetrievalChain
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
    memory=memory,
    chain_type="stuff"
)


In [None]:
# Initialize Supabase client
from dotenv import load_dotenv
load_dotenv()
import os
url = os.getenv("SUPABASE_URL")
key = os.getenv("ANON_PUBLIC_KEY")
supabase: Client = create_client(url, key)

In [49]:
# Function to store chat in Supabase
def store_chat_in_supabase(user_id: str, query: str, answer: str, session_id: str = None):
    try:
        data = {
            "user_id": user_id,
            "query": query,
            "answer": answer,
            "session_id": session_id or str(uuid.uuid4())
        }
        response = supabase.table("chat_history").insert(data).execute()
        return response.data
    except Exception as e:
        raise RuntimeError(f"Failed to store chat in Supabase: {str(e)}")

In [50]:
# Function to retrieve chat history from Supabase
def get_chat_history(user_id: str, session_id: str = None):
    try:
        query = supabase.table("chat_history").select("*").eq("user_id", user_id)
        if session_id:
            query = query.eq("session_id", session_id)
        response = query.order("timestamp", desc=True).execute()
        return response.data
    except Exception as e:
        raise RuntimeError(f"Failed to retrieve chat history: {str(e)}")

In [51]:
# Function to load Supabase chat history into memory
def load_supabase_history_to_memory(user_id: str, session_id: str = None):
    history = get_chat_history(user_id, session_id)
    memory.clear()  # Clear existing memory to avoid duplication
    for entry in history:
        memory.chat_memory.add_user_message(entry["query"])
        memory.chat_memory.add_ai_message(entry["answer"])


In [52]:
# Function to retrieve answers with history from Supabase
def retrieve_answers_with_history(query, user_id="default_user", session_id=None):
    if not query or not isinstance(query, str):
        raise ValueError("Query must be a non-empty string")
    try:
        # Load chat history from Supabase into memory
        load_supabase_history_to_memory(user_id, session_id)
        
        # Process the query with the RAG chain
        response = qa_chain({"question": query})
        answer = response["answer"]
        
        # Store the new query and answer in Supabase
        store_chat_in_supabase(user_id, query, answer, session_id)
        
        # Retrieve updated chat history from Supabase
        chat_history = get_chat_history(user_id, session_id)
        return answer, chat_history
    except Exception as e:
        raise RuntimeError(f"Failed to retrieve answer: {str(e)}")

In [54]:
# Test the chain and Supabase integration
if __name__ == "__main__":
    user_id = "user_123"
    session_id = "session_001"
    
    # Test initial query
    query = "What is the main topic of the documents?"
    answer, chat_history = retrieve_answers_with_history(query, user_id, session_id)
    print(f"Answer: {answer}")
    print(f"Chat History from Supabase: {chat_history}")
    
    # Test follow-up question
    follow_up_query = "Can you tell me more about AI in journalism?"
    answer, chat_history = retrieve_answers_with_history(follow_up_query, user_id, session_id)
    print(f"Follow-up Answer: {answer}")
    print(f"Chat History from Supabase: {chat_history}")

Answer: The main topics discussed in the provided documents are:

1. AI in Journalism (document 8.2)
2. Autonomous Surgery (document 8.3)
3. Challenges in Integration (document 9)
4. Technical Limitations (document 9, section 9.1)
5. Cultural Resistance (document 9, section 9.2) 
Chat History from Supabase: [{'id': '3ccb2579-0773-4bf7-98f0-e2108752078b', 'user_id': 'user_123', 'query': 'What is the main topic of the documents?', 'answer': 'The main topics discussed in the provided documents are:\n\n1. AI in Journalism (document 8.2)\n2. Autonomous Surgery (document 8.3)\n3. Challenges in Integration (document 9)\n4. Technical Limitations (document 9, section 9.1)\n5. Cultural Resistance (document 9, section 9.2) ', 'timestamp': '2025-06-20T10:28:48.312052+00:00', 'session_id': 'session_001'}, {'id': 'd3962f43-4958-4719-aae3-0845ded7b7c4', 'user_id': 'user_123', 'query': 'what is my last query?', 'answer': 'The topic of the document 8.2 is AI in Journalism. ', 'timestamp': '2025-06-20T1

# end here