In [None]:
langchain
langchain-community
chromadb
tiktoken
bs4
langchain-groq 
sentence-transformers
python-dotenv
streamlit



In [None]:
import streamlit as st
import os
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.prompts import PromptTemplate
from langchain.tools import tool
from langchain.tools.render import render_text_description_and_args
from langchain.agents.output_parsers import JSONAgentOutputParser
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents import AgentExecutor
from langchain.memory import ConversationBufferMemory, ConversationSummaryBufferMemory
from langchain.schema import BaseMessage, HumanMessage, AIMessage
from dotenv import load_dotenv
import time
import json
import pickle
from datetime import datetime
from pathlib import Path

In [None]:
# Load environment variables
load_dotenv()
os.environ["USER_AGENT"] = "ai-llm-agentic-rag/1.0"

# Create memory directory
MEMORY_DIR = Path("./chat_memory")
MEMORY_DIR.mkdir(exist_ok=True)

# Page configuration
st.set_page_config(
    page_title="AI/LLM Expert Chat Bot with Memory",
    page_icon="🤖",
    layout="wide",
    initial_sidebar_state="expanded"
)

In [None]:
# Custom CSS for better styling
st.markdown("""
<style>
    .main-header {
        text-align: center;
        padding: 1rem 0;
        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
        color: white;
        border-radius: 10px;
        margin-bottom: 2rem;
    }
    .chat-message {
        padding: 1rem;
        border-radius: 10px;
        margin-bottom: 1rem;
        border-left: 4px solid #667eea;
    }
    .user-message {
        background-color: #f0f2f6;
        border-left-color: #667eea;
    }
    .assistant-message {
        background-color: #e8f4fd;
        border-left-color: #1f77b4;
    }
    .sidebar-content {
        background-color: #f8f9fa;
        padding: 1rem;
        border-radius: 10px;
        margin-bottom: 1rem;
    }
    .memory-info {
        background-color: #e8f5e8;
        padding: 0.5rem;
        border-radius: 5px;
        border-left: 3px solid #28a745;
        margin: 0.5rem 0;
    }
</style>
""", unsafe_allow_html=True)


In [None]:
# AI and LLM focused URLs
AI_LLM_URLS = [
    "https://en.wikipedia.org/wiki/Large_language_model",
    "https://en.wikipedia.org/wiki/Transformer_(machine_learning_model)",
    "https://en.wikipedia.org/wiki/GPT-3",
    "https://en.wikipedia.org/wiki/ChatGPT",
    "https://en.wikipedia.org/wiki/Artificial_intelligence",
    "https://en.wikipedia.org/wiki/Machine_learning",
    "https://en.wikipedia.org/wiki/Deep_learning",
    "https://en.wikipedia.org/wiki/Natural_language_processing",
    "https://en.wikipedia.org/wiki/BERT_(language_model)",
    "https://en.wikipedia.org/wiki/Attention_(machine_learning)"
]


In [None]:
class ChatMemoryManager:
    """Manages chat memory with persistence options"""
    
    def __init__(self, session_id=None, memory_type="buffer", max_token_limit=2000):
        self.session_id = session_id or f"session_{int(time.time())}"
        self.memory_type = memory_type
        self.max_token_limit = max_token_limit
        self.memory_file = MEMORY_DIR / f"{self.session_id}_memory.pkl"
        self.chat_history_file = MEMORY_DIR / f"{self.session_id}_history.json"
        
        # Initialize memory based on type
        if memory_type == "summary":
            self.memory = ConversationSummaryBufferMemory(
                llm=st.session_state.get('llm'),
                max_token_limit=max_token_limit,
                return_messages=True,
                memory_key="chat_history"
            )
        else:
            self.memory = ConversationBufferMemory(
                return_messages=True,
                memory_key="chat_history"
            )
        
        # Load existing memory if available
        self.load_memory()
    
    def save_memory(self):
        """Save memory to file"""
        try:
            with open(self.memory_file, 'wb') as f:
                pickle.dump(self.memory.chat_memory.messages, f)
            
            # Also save as JSON for human readability
            chat_data = {
                "session_id": self.session_id,
                "timestamp": datetime.now().isoformat(),
                "messages": [
                    {
                        "type": type(msg).__name__,
                        "content": msg.content,
                        "timestamp": datetime.now().isoformat()
                    }
                    for msg in self.memory.chat_memory.messages
                ]
            }
            
            with open(self.chat_history_file, 'w') as f:
                json.dump(chat_data, f, indent=2)
                
        except Exception as e:
            st.error(f"Error saving memory: {str(e)}")
    
    def load_memory(self):
        """Load memory from file"""
        try:
            if self.memory_file.exists():
                with open(self.memory_file, 'rb') as f:
                    messages = pickle.load(f)
                    for msg in messages:
                        self.memory.chat_memory.add_message(msg)
                        
        except Exception as e:
            st.warning(f"Could not load previous memory: {str(e)}")
    
    def add_message(self, human_message, ai_message):
        """Add a conversation turn to memory"""
        self.memory.chat_memory.add_user_message(human_message)
        self.memory.chat_memory.add_ai_message(ai_message)
        self.save_memory()
    
    def get_memory_summary(self):
        """Get a summary of the current memory"""
        messages = self.memory.chat_memory.messages
        if not messages:
            return "No conversation history"
        
        total_messages = len(messages)
        human_messages = len([m for m in messages if isinstance(m, HumanMessage)])
        ai_messages = len([m for m in messages if isinstance(m, AIMessage)])
        
        return f"Memory: {total_messages} total messages ({human_messages} human, {ai_messages} AI)"
    
    def clear_memory(self):
        """Clear all memory"""
        self.memory.clear()
        if self.memory_file.exists():
            self.memory_file.unlink()
        if self.chat_history_file.exists():
            self.chat_history_file.unlink()
    
    def get_context_for_agent(self):
        """Get formatted context for the agent"""
        if not self.memory.chat_memory.messages:
            return ""
        
        # Get recent conversation context
        recent_messages = self.memory.chat_memory.messages[-6:]  # Last 3 turns
        context = "Recent conversation context:\n"
        
        for msg in recent_messages:
            if isinstance(msg, HumanMessage):
                context += f"Human: {msg.content}\n"
            elif isinstance(msg, AIMessage):
                context += f"Assistant: {msg.content}\n"
        
        return context

In [None]:
@st.cache_resource
def initialize_llm():
    """Initialize the LLM with caching"""
    return ChatGroq(
        model_name="llama3-70b-8192",
        temperature=0.6,
        max_tokens=250,
        stop=["Human:", "Observation"],
    )

@st.cache_resource
def create_ai_knowledge_base(urls, persist_directory="./ai_llm_chroma_db"):
    """Create an AI/LLM focused knowledge base from web URLs with caching"""
    try:
        # Load documents from web URLs
        loader = WebBaseLoader(urls)
        documents = loader.load()
        
        # Split documents into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len,
            add_start_index=True,
        )
        
        splits = text_splitter.split_documents(documents)
        
        # Create embeddings
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2",
            model_kwargs={'device': 'cpu'},
            encode_kwargs={'normalize_embeddings': True}
        )
        
        # Create vector store
        vectorstore = Chroma.from_documents(
            documents=splits,
            embedding=embeddings,
            persist_directory=persist_directory
        )
        
        return vectorstore, len(splits)
    except Exception as e:
        st.error(f"Error creating knowledge base: {str(e)}")
        return None, 0

@st.cache_resource
def load_existing_ai_knowledge_base(persist_directory="./ai_llm_chroma_db"):
    """Load an existing AI/LLM knowledge base with caching"""
    try:
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2",
            model_kwargs={'device': 'cpu'},
            encode_kwargs={'normalize_embeddings': True}
        )
        
        vectorstore = Chroma(
            persist_directory=persist_directory,
            embedding_function=embeddings
        )
        
        return vectorstore
    except Exception as e:
        st.error(f"Error loading knowledge base: {str(e)}")
        return None

In [None]:
def setup_retriever(vectorstore, search_type="similarity", k=5):
    """Setup retriever with different search strategies"""
    if search_type == "similarity":
        retriever = vectorstore.as_retriever(
            search_type="similarity",
            search_kwargs={"k": k}
        )
    elif search_type == "mmr":
        retriever = vectorstore.as_retriever(
            search_type="mmr",
            search_kwargs={"k": k, "fetch_k": 20}
        )
    elif search_type == "similarity_score":
        retriever = vectorstore.as_retriever(
            search_type="similarity_score_threshold",
            search_kwargs={"score_threshold": 0.5, "k": k}
        )
    
    return retriever

In [None]:
# Global variables for tools
retriever = None
llm = None
memory_manager = None

In [None]:
@tool
def retrieve_ai_documents(query: str) -> str:
    """Retrieve relevant AI/LLM documents from the knowledge base for a given query."""
    try:
        docs = retriever.get_relevant_documents(query)
        
        if not docs:
            return "No relevant AI/LLM documents found for the query."
        
        formatted_docs = []
        for i, doc in enumerate(docs, 1):
            source = doc.metadata.get('source', 'Unknown')
            content = doc.page_content[:800] + "..." if len(doc.page_content) > 800 else doc.page_content
            formatted_docs.append(f"Document {i} (Source: {source}):\n{content}")
        
        return "\n\n".join(formatted_docs)
    
    except Exception as e:
        return f"Error retrieving documents: {str(e)}"

@tool
def answer_ai_question_with_memory(query: str) -> str:
    """Answer AI/LLM related questions using retrieved context and conversation memory."""
    try:
        # Get relevant documents
        context = retrieve_ai_documents(query)
        
        # Get conversation memory context
        memory_context = ""
        if memory_manager:
            memory_context = memory_manager.get_context_for_agent()
        
        answer_prompt = f"""
Based on the following context about AI and Large Language Models, and considering our previous conversation, please provide a comprehensive and accurate answer to the question.

{memory_context}

Knowledge Base Context:
{context}

Current Question: {query}

Please provide a detailed answer that:
1. Considers our previous conversation when relevant
2. Uses information from the provided knowledge base context
3. Explains technical concepts clearly
4. Mentions specific AI/LLM technologies when relevant
5. References previous topics we discussed if they relate to the current question

Answer:"""
        
        response = llm.invoke(answer_prompt)
        return response.content
        
    except Exception as e:
        return f"Error answering question: {str(e)}"