In [9]:
import pickle
import time
import pandas as pd
import langchain
from datetime import datetime

from langchain import HuggingFaceHub
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from sentence_transformers import SentenceTransformer
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate

from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
from transformers import pipeline
import torch
import os
import streamlit as st
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_core.messages import HumanMessage

In [10]:
class RAGChatSystem:
    def __init__(self):
        """Initialize the RAG Chat System with context memory"""
        self.setup_environment()
        self.setup_models()
        self.chat_history = []
        self.memory = ConversationBufferMemory(
            memory_key="chat_history",
            return_messages=True,
            output_key="answer"
        )
        self.vector_index = None
        self.conversation_chain = None
        
    def setup_environment(self):
        """Setup API keys and environment variables"""
        # Replace with your actual API keys
        os.environ["HUGGINGFACEHUB_API_TOKEN"] = "AIzaSyC9LjGSjUBs2497iOgH6NApM5TrG1CSFWk"
        os.environ["GOOGLE_API_KEY"] = "AIzaSyC9LjGSjUBs2497iOgH6NApM5TrG1CSFWk"
        
    def setup_models(self):
        """Initialize LLM and embeddings"""
        self.llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.7)
        self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        
    def load_and_process_documents(self, urls=None):
        """Load documents from URLs and create vector store"""
        if urls is None:
            urls = [
                "https://en.wikipedia.org/wiki/Alien",
                "https://en.wikipedia.org/wiki/Pakistan",
                "https://en.wikipedia.org/wiki/India"
            ]
        
        print("Loading documents...")
        loader = UnstructuredURLLoader(urls=urls)
        docs = loader.load()
        
        print("Splitting documents...")
        text_splitter = RecursiveCharacterTextSplitter(
            separators=["\n\n", "\n", ".", " "],
            chunk_size=500,  # Increased chunk size for better context
            chunk_overlap=50
        )
        docs = text_splitter.split_documents(docs)
        
        print("Creating vector index...")
        self.vector_index = FAISS.from_documents(docs, self.embeddings)
        
        # Save vector index
        self.save_vector_index()
        self.setup_conversation_chain()
        print("Setup complete!")
        
    def save_vector_index(self, file_path="vector_index.pkl"):
        """Save vector index to file"""
        with open(file_path, 'wb') as f:
            pickle.dump(self.vector_index, f)
            
    def load_vector_index(self, file_path="vector_index.pkl"):
        """Load vector index from file"""
        if os.path.exists(file_path):
            with open(file_path, 'rb') as f:
                self.vector_index = pickle.load(f)
                self.setup_conversation_chain()
                print("Vector index loaded successfully!")
                return True
        return False
        
    def setup_conversation_chain(self):
        """Setup the conversational retrieval chain with custom prompt"""
        if self.vector_index is None:
            raise ValueError("Vector index not loaded. Please load documents first.")
# Custom prompt template that includes chat history
        custom_prompt = PromptTemplate(
            template="""You are a helpful AI assistant that answers questions based on the provided context and chat history.
            
Context from documents: {context}

Chat History: {chat_history}

Current Question: {question}

Please provide a comprehensive answer based on the context and previous conversation. If you can't find the answer in the context, say so clearly.

Answer: """,
            input_variables=["context", "chat_history", "question"]
        )
        
        self.conversation_chain = ConversationalRetrievalChain.from_llm(
            llm=self.llm,
            retriever=self.vector_index.as_retriever(
                search_kwargs={"k": 5}
            ),
            memory=self.memory,
            return_source_documents=True,
            verbose=True,
            combine_docs_chain_kwargs={"prompt": custom_prompt}
        )
        
    def chat(self, question):
        """Main chat function that maintains context"""
        if self.conversation_chain is None:
            return "Please load documents first using load_documents() method."
            
        try:
            # Get response from the chain
            response = self.conversation_chain({"question": question})
            
            # Extract answer and sources
            answer = response["answer"]
            source_docs = response.get("source_documents", [])
            
            # Format sources
            sources = []
            for doc in source_docs[:3]:  # Limit to top 3 sources
                if hasattr(doc, 'metadata') and 'source' in doc.metadata:
                    sources.append(doc.metadata['source'])
                    
            # Store in chat history for display
            chat_entry = {
                "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "question": question,
                "answer": answer,
                "sources": sources
            }
            self.chat_history.append(chat_entry)
            
            return self.format_response(answer, sources)
            
        except Exception as e:
            return f"Error processing question: {str(e)}"
            
    def format_response(self, answer, sources):
        """Format the response with sources"""
        response = f"Answer: {answer}\n"
        if sources:
            response += f"\nSources:\n"
            for i, source in enumerate(sources, 1):
                response += f"{i}. {source}\n"
        return response
        
    def get_chat_history(self):
        """Return formatted chat history"""
        if not self.chat_history:
            return "No chat history available."
            
        history = "\n" + "="*50 + "\n"
        history += "CHAT HISTORY\n"
        history += "="*50 + "\n"
        
        for i, entry in enumerate(self.chat_history, 1):
            history += f"\n[{entry['timestamp']}] Question {i}:\n"
            history += f"Q: {entry['question']}\n"
            history += f"A: {entry['answer']}\n"
            if entry['sources']:
                history += f"Sources: {', '.join(entry['sources'])}\n"
            history += "-" * 30 + "\n"
            
        return history
        
    def clear_history(self):
        """Clear chat history and memory"""
        self.chat_history = []
        self.memory.clear()
        print("Chat history cleared!")
        
    def save_chat_history(self, filename=None):
        """Save chat history to file"""
        if filename is None:
            filename = f"chat_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
            
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(self.get_chat_history())
        print(f"Chat history saved to {filename}")
   

In [18]:
if __name__ == "__main__":
    # Initialize the chat system
    chat_system = RAGChatSystem()
    
    # Try to load existing vector index, if not found, create new one
    if not chat_system.load_vector_index():
        print("No existing vector index found. Creating new one...")
        chat_system.load_and_process_documents()
    
    print("\n" + "="*60)
    print("RAG CHAT SYSTEM INITIALIZED")
    print("="*60)
    print("Commands:")
    print("- Type your questions normally")
    print("- Type 'history' to see chat history")
    print("- Type 'clear' to clear chat history")
    print("- Type 'save' to save chat history")
    print("- Type 'quit' to exit")
    print("="*60 + "\n")
    
    # Interactive chat loop
    while True:
        try:
            user_input = input("\nYou: ").strip()
            
            if user_input.lower() == 'quit':
                print("Goodbye!")
                break
            elif user_input.lower() == 'history':
                print(chat_system.get_chat_history())
            elif user_input.lower() == 'clear':
                chat_system.clear_history()
            elif user_input.lower() == 'save':
                chat_system.save_chat_history()
            elif user_input:
                print("\nAI:", chat_system.chat(user_input))
            else:
                print("Please enter a question or command.")
                
        except KeyboardInterrupt:
            print("\n\nGoodbye!")
            break
        except Exception as e:
            print(f"Error: {str(e)}")

# Alternative usage for Jupyter notebooks
def create_chat_session():
    """Helper function to create a chat session in Jupyter"""
    chat_system = RAGChatSystem()
    
    # Try to load existing vector index
    if not chat_system.load_vector_index():
        print("Creating new vector index...")
        chat_system.load_and_process_documents()
    
    return chat_system

Vector index loaded successfully!

RAG CHAT SYSTEM INITIALIZED
Commands:
- Type your questions normally
- Type 'history' to see chat history
- Type 'clear' to clear chat history
- Type 'save' to save chat history
- Type 'quit' to exit




You:  hi i am asif what do u know about pakistan 




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful AI assistant that answers questions based on the provided context and chat history.

Context from documents: See also: Bibliography of Pakistan

Overview

^ a b "Country Profile: Pakistan" (PDF). Library of Congress Country Studies on Pakistan. Library of Congress. February 2005. Retrieved 1 September 2010

^ "Pakistan Overview". worldbank.org.

countries and Pakistan

^ a b c "Pakistan". World Factbook. CIA. Retrieved 13 February 2008.

Chat History: 

Current Question: hi i am asif what do u know about pakistan

Please provide a comprehensive answer based on the context and previous conversation. If you can't find the answer in the context, say so clearly.

Answer: [0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m

AI: Answer: Hi Asif!  Based on the provided text, I know that Pakistan is a country.  The Library of Congress a


You:  can u solve the issues and all the problems of pakistan 




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: hi i am asif what do u know about pakistan
Assistant: Hi Asif!  Based on the provided text, I know that Pakistan is a country.  The Library of Congress and the World Bank have published information about it (though the specifics of that information are not included here).  The CIA World Factbook also contains a profile of Pakistan.  Beyond that, I don't have any further details about Pakistan from this limited context.
Follow Up Input: can u solve the issues and all the problems of pakistan
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful AI assistant that answers questions based on the 


You:  quit


Goodbye!


In [17]:
chat = create_chat_session()

Vector index loaded successfully!


In [13]:
response1 = chat.chat("What is the relation between India and Pakistan?")
print(response1)

response2 = chat.chat("Tell me more about the partition mentioned earlier")
print(response2)

response3 = chat.chat("What were the main causes of this partition?")
print(response3)

# View chat history
print(chat.get_chat_history())

# Clear history if needed
# chat.clear_history()



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful AI assistant that answers questions based on the provided context and chat history.

Context from documents: countries and Pakistan

. The region has been the subject of a bitter dispute between India and Pakistan since they became independent in 1947

Main article: China–Pakistan relations

... has been the subject of dispute between India and Pakistan since the partition of the Indian subcontinent in 1947

See also: Bibliography of Pakistan

Overview

Chat History: 

Current Question: What is the relation between India and Pakistan?

Please provide a comprehensive answer based on the context and previous conversation. If you can't find the answer in the context, say so clearly.

Answer: [0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m
Answer: Based on the provided text, the relationship between India and Pakistan has been c

In [19]:
response1 = chat.chat("waht was the role in this for usa ")



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful AI assistant that answers questions based on the provided context and chat history.

Context from documents: .[312] During the 1970s, Pakistan acted as an intermediary in U.S.-China rapprochement,[313] facilitating US President Richard Nixon's historic visit to China

^ "ISC News". International School Consultancy Group. Archived from the original on 4 March 2016.

in its foreign policy

^ Haqqani 2010, p. 400.

^ Asia Watch Committee (U.S.) 1992.

United States Senate Committee on Foreign Relations (2008). U.S

Chat History: 

Current Question: waht was the role in this for usa 

Please provide a comprehensive answer based on the context and previous conversation. If you can't find the answer in the context, say so clearly.

Answer: [0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


In [20]:
print(chat.get_chat_history())



CHAT HISTORY

[2025-08-25 14:45:25] Question 1:
Q: waht was the role in this for usa 
A: Based on the provided text, the U.S.'s role was to seek rapprochement with China.  Pakistan acted as an intermediary, facilitating U.S. President Richard Nixon's visit to China, a key step in achieving this goal.
Sources: https://en.wikipedia.org/wiki/Pakistan, https://en.wikipedia.org/wiki/Pakistan, https://en.wikipedia.org/wiki/Pakistan
------------------------------

