In [1]:
# COMPLETE RAG SYSTEM FOR "THE VERDICT" PDF
from langchain_core.messages import HumanMessage, AIMessage
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# 1. LOAD AND SPLIT THE PDF
print("📖 LOADING 'THE VERDICT' PDF")
print("=" * 50)

# Load the PDF file
loader = PyPDFLoader("The_verdict.pdf")
documents = loader.load()

# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)
splits = text_splitter.split_documents(documents)

print(f"✅ Loaded {len(splits)} chunks from the PDF")

# 2. INITIALIZE CHAT MODEL
llm = ChatOpenAI(
    model="openai/gpt-3.5-turbo",
    temperature=0.7,
    openai_api_key="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",
    openai_api_base="https://openrouter.ai/api/v1"
)

# 3. SIMPLE RETRIEVER FUNCTION
def retrieve_chunks(query, chunks, top_k=3):
    """Find relevant document chunks for any query"""
    query = query.lower()
    relevant_chunks = []
    
    for chunk in chunks:
        content = chunk.page_content.lower()
        if query in content:
            relevant_chunks.append(chunk)
        elif any(word in content for word in query.split()):
            relevant_chunks.append(chunk)
    
    return relevant_chunks[:top_k]

# 4. PROMPT TEMPLATE
prompt_template = """
Answer the question based on the document content below.

CHAT HISTORY:
{chat_history}

DOCUMENT CONTEXT:
{context}

QUESTION: {question}

Answer based only on the document content provided.
"""

# 5. CHAT HISTORY MANAGEMENT
chat_history = []

def format_chat_history(history):
    """Convert LangChain messages to text"""
    history_text = ""
    for message in history:
        if isinstance(message, HumanMessage):
            history_text += f"Human: {message.content}\n"
        elif isinstance(message, AIMessage):
            history_text += f"AI: {message.content}\n"
    return history_text

# 6. MAIN RAG FUNCTION
def ask_question(question):
    """Complete RAG pipeline for your document"""
    global chat_history
    
    # Retrieve relevant chunks
    relevant_chunks = retrieve_chunks(question, splits)
    
    if not relevant_chunks:
        answer = "I couldn't find relevant information in the document."
    else:
        # Prepare context
        context = "\n\n".join([chunk.page_content for chunk in relevant_chunks])
        history_text = format_chat_history(chat_history)
        
        # Create prompt
        prompt = prompt_template.format(
            chat_history=history_text,
            context=context,
            question=question
        )
        
        # Get answer from LLM
        response = llm.invoke(prompt)
        answer = response.content
    
    # Update chat history
    chat_history.append(HumanMessage(content=question))
    chat_history.append(AIMessage(content=answer))
    
    return answer, relevant_chunks

# 7. TEST THE SYSTEM
print("🚀 RAG SYSTEM READY FOR 'THE VERDICT'")
print("=" * 50)

# Ask questions about the document
questions = [
    "What is this document about?",
    "Who are the main characters?",
    "What is the story about?"
]

for question in questions:
    print(f"\n👤 HUMAN: {question}")
    answer, used_chunks = ask_question(question)
    print(f"🤖 AI: {answer}")
    if used_chunks:
        print(f"   📍 From page: {used_chunks[0].metadata.get('page', 'N/A') + 1}")

# 8. INTERACTIVE CHAT
print("\n💬 ASK QUESTIONS ABOUT THE DOCUMENT (type 'quit' to exit)")
print("=" * 60)

while True:
    user_input = input("\n👤 Your question: ").strip()
    
    if user_input.lower() in ['quit', 'exit', 'bye']:
        break
        
    if user_input:
        answer, used_chunks = ask_question(user_input)
        print(f"🤖 {answer}")
        if used_chunks:
            print(f"   📍 From page: {used_chunks[0].metadata.get('page', 'N/A') + 1}")

print(f"\n✅ Conversation ended. Chat history: {len(chat_history)} messages")

📖 LOADING 'THE VERDICT' PDF
✅ Loaded 35 chunks from the PDF
🚀 RAG SYSTEM READY FOR 'THE VERDICT'

👤 HUMAN: What is this document about?
🤖 AI: The document is about the artist Jack Gisburn who has given up painting, married a rich widow, and moved to the Riviera, much to the disappointment of his female admirers.
   📍 From page: 1

👤 HUMAN: Who are the main characters?
🤖 AI: The main character mentioned in the document is Jack Gisburn, an artist who has given up painting, married a rich widow, and moved to the Riviera. Other characters mentioned include Mrs. Gideon Thwing, Hermia Croft, Mr. Rickham, and Claude Nutley.
   📍 From page: 1

👤 HUMAN: What is the story about?
🤖 AI: The story is about artist Jack Gisburn who gives up painting, marries a rich widow, and moves to the Riviera, much to the disappointment of his female admirers and fellow artists.
   📍 From page: 1

💬 ASK QUESTIONS ABOUT THE DOCUMENT (type 'quit' to exit)



👤 Your question:  bye



✅ Conversation ended. Chat history: 6 messages


In [2]:
import sqlite3
from datetime import datetime
import uuid

DB_NAME = "rag_app.db"

def get_db_connection():
    """Create and return a database connection"""
    conn = sqlite3.connect(DB_NAME)
    conn.row_factory = sqlite3.Row
    return conn

def create_application_logs():
    """Create the application logs table if it doesn't exist"""
    conn = get_db_connection()
    conn.execute('''CREATE TABLE IF NOT EXISTS application_logs
    (id INTEGER PRIMARY KEY AUTOINCREMENT,
    session_id TEXT,
    user_query TEXT,
    gpt_response TEXT,
    model TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')
    conn.commit()
    conn.close()

def insert_application_logs(session_id, user_query, gpt_response, model):
    """Insert a new chat interaction into the database"""
    conn = get_db_connection()
    conn.execute('INSERT INTO application_logs (session_id, user_query, gpt_response, model) VALUES (?, ?, ?, ?)',
                 (session_id, user_query, gpt_response, model))
    conn.commit()
    conn.close()

def get_chat_history(session_id):
    """Retrieve chat history for a specific session"""
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute('SELECT user_query, gpt_response FROM application_logs WHERE session_id = ? ORDER BY created_at', (session_id,))
    messages = []
    for row in cursor.fetchall():
        messages.extend([
            {"role": "human", "content": row['user_query']},
            {"role": "ai", "content": row['gpt_response']}
        ])
    conn.close()
    return messages

def get_all_sessions():
    """Get all unique session IDs"""
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute('SELECT DISTINCT session_id FROM application_logs ORDER BY created_at DESC')
    sessions = [row['session_id'] for row in cursor.fetchall()]
    conn.close()
    return sessions

def get_session_messages(session_id):
    """Get all messages for a specific session in chronological order"""
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute('''
        SELECT user_query, gpt_response, created_at 
        FROM application_logs 
        WHERE session_id = ? 
        ORDER BY created_at
    ''', (session_id,))
    
    messages = []
    for row in cursor.fetchall():
        messages.append({
            "timestamp": row['created_at'],
            "human": row['user_query'],
            "ai": row['gpt_response']
        })
    conn.close()
    return messages

def clear_session_history(session_id):
    """Delete all messages for a specific session"""
    conn = get_db_connection()
    conn.execute('DELETE FROM application_logs WHERE session_id = ?', (session_id,))
    conn.commit()
    conn.close()

# Initialize the database
create_application_logs()

# Example usage
if __name__ == "__main__":
    # Create a new session
    session_id = str(uuid.uuid4())
    
    # Example conversation
    questions_answers = [
        ("What is GreenGrow Innovations?", "GreenGrow Innovations is a company focused on sustainable agriculture technology."),
        ("What was their first product?", "Their first product was an AI-powered irrigation system."),
        ("When were they founded?", "They were founded in 2018.")
    ]
    
    # Store the conversation
    for question, answer in questions_answers:
        insert_application_logs(session_id, question, answer, "gpt-3.5-turbo")
        print(f"Human: {question}")
        print(f"AI: {answer}\n")
    
    # Retrieve and display chat history
    print("📋 CHAT HISTORY:")
    print("=" * 40)
    history = get_chat_history(session_id)
    for i, message in enumerate(history):
        if message["role"] == "human":
            print(f"👤 {message['content']}")
        else:
            print(f"🤖 {message['content']}")
        print("-" * 30)
    
    # Show all sessions
    print("\n📊 ALL SESSIONS:")
    sessions = get_all_sessions()
    for session in sessions:
        print(f"Session ID: {session}")
    
    # Show detailed session messages
    print(f"\n📝 DETAILED HISTORY FOR SESSION {session_id}:")
    session_messages = get_session_messages(session_id)
    for msg in session_messages:
        print(f"[{msg['timestamp']}] 👤: {msg['human']}")
        print(f"[{msg['timestamp']}] 🤖: {msg['ai']}")
        print()

Human: What is GreenGrow Innovations?
AI: GreenGrow Innovations is a company focused on sustainable agriculture technology.

Human: What was their first product?
AI: Their first product was an AI-powered irrigation system.

Human: When were they founded?
AI: They were founded in 2018.

📋 CHAT HISTORY:
👤 What is GreenGrow Innovations?
------------------------------
🤖 GreenGrow Innovations is a company focused on sustainable agriculture technology.
------------------------------
👤 What was their first product?
------------------------------
🤖 Their first product was an AI-powered irrigation system.
------------------------------
👤 When were they founded?
------------------------------
🤖 They were founded in 2018.
------------------------------

📊 ALL SESSIONS:
Session ID: 5abf926d-77de-4b1c-8725-610fc21bff1a

📝 DETAILED HISTORY FOR SESSION 5abf926d-77de-4b1c-8725-610fc21bff1a:
[2025-09-03 15:44:09] 👤: What is GreenGrow Innovations?
[2025-09-03 15:44:09] 🤖: GreenGrow Innovations is a comp

In [3]:
def view_database_content():
    """View all data in the database"""
    conn = get_db_connection()
    cursor = conn.cursor()
    
    # View all tables
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    print("📊 TABLES IN DATABASE:")
    for table in tables:
        print(f" - {table['name']}")
    
    # View all data in application_logs
    print("\n📝 ALL CHAT RECORDS:")
    cursor.execute("SELECT * FROM application_logs ORDER BY created_at")
    rows = cursor.fetchall()
    
    for row in rows:
        print(f"\nSession: {row['session_id']}")
        print(f"Question: {row['user_query']}")
        print(f"Answer: {row['gpt_response']}")
        print(f"Model: {row['model']}")
        print(f"Time: {row['created_at']}")
        print("-" * 50)
    
    conn.close()

# Call this function to see the database
view_database_content()

📊 TABLES IN DATABASE:
 - application_logs
 - sqlite_sequence

📝 ALL CHAT RECORDS:

Session: 5abf926d-77de-4b1c-8725-610fc21bff1a
Question: What is GreenGrow Innovations?
Answer: GreenGrow Innovations is a company focused on sustainable agriculture technology.
Model: gpt-3.5-turbo
Time: 2025-09-03 15:44:09
--------------------------------------------------

Session: 5abf926d-77de-4b1c-8725-610fc21bff1a
Question: What was their first product?
Answer: Their first product was an AI-powered irrigation system.
Model: gpt-3.5-turbo
Time: 2025-09-03 15:44:09
--------------------------------------------------

Session: 5abf926d-77de-4b1c-8725-610fc21bff1a
Question: When were they founded?
Answer: They were founded in 2018.
Model: gpt-3.5-turbo
Time: 2025-09-03 15:44:09
--------------------------------------------------
