# Document Chat Bot

This notebook demonstrates a multi-document chatbot for querying PDF documents.


## 1. Install Required Packages


In [None]:
%pip install goldmansachs.awm_genai -U
%pip install python-dotenv pandas ipywidgets pdfplumber


## 2. Import Libraries and Configuration


In [None]:
from goldmansachs.awm_genai import LLM, LLMConfig
import os
from typing import List, Dict
import pandas as pd
from datetime import datetime
import tempfile
from IPython.display import display, HTML
import ipywidgets as widgets
import pdfplumber


In [None]:
# Configuration
app_id = "trai"
env = "uat"

# Model Configuration - Choose your model
available_models = ["gemini-2.5-pro", "gemini-2.5-flash-lite"]

# Create model selection widget
model_selector = widgets.Dropdown(
    options=available_models,
    value="gemini-2.5-flash-lite",
    description='Model:',
    style={'description_width': 'initial'}
)

# Display model selector
display(HTML("<h3>Select Model</h3>"))
display(HTML("<p><b>gemini-2.5-pro:</b> More capable, better for complex questions<br><b>gemini-2.5-flash-lite:</b> Faster responses, good for simple queries</p>"))
display(model_selector)

# Store configuration
temperature = 0
log_level = "DEBUG"

print(f"\nApp ID: {app_id}")
print(f"Environment: {env}")


## 3. Initialize LLM


In [None]:
# Initialize LLM with selected model
model_name = model_selector.value

llm_config = LLMConfig(
    app_id=app_id,
    env=env,
    model_name=model_name,
    temperature=temperature,
    log_level=log_level,
)

llm = LLM.init(config=llm_config)
print(f"✅ LLM initialized successfully with {model_name}")


## 4. Upload Documents

Use the file upload widget below to select your PDF documents.


In [None]:
# Create file upload widget
upload_widget = widgets.FileUpload(
    accept='.pdf',
    multiple=True,
    description='Select PDFs'
)

# Create process button
process_button = widgets.Button(
    description='Extract Text',
    button_style='primary',
    icon='check'
)

# Create output widget for status messages
output = widgets.Output()

# Store extracted text globally
extracted_text = None
document_names = []

def on_process_button_clicked(b):
    global extracted_text, document_names
    
    with output:
        output.clear_output()
        
        if not upload_widget.value:
            print("⚠️ Please select PDF files first")
            return
        
        try:
            all_text = []
            document_names = []
            
            # Extract text from uploaded files
            files = upload_widget.value
            print(f"Extracting text from {len(files)} files...\n")
            
            for file_info in files:
                filename = file_info['name']
                content = file_info['content']
                document_names.append(filename)
                
                # Save to temp file
                with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
                    tmp_file.write(content)
                    tmp_path = tmp_file.name
                
                try:
                    # Extract text using pdfplumber
                    with pdfplumber.open(tmp_path) as pdf:
                        doc_text = f"\n\n--- Document: {filename} ---\n\n"
                        for page_num, page in enumerate(pdf.pages, 1):
                            text = page.extract_text()
                            if text:
                                doc_text += f"\n[Page {page_num}]\n{text}\n"
                        all_text.append(doc_text)
                        print(f"  ✓ {filename} - {len(pdf.pages)} pages extracted")
                finally:
                    os.unlink(tmp_path)
            
            # Combine all extracted text
            extracted_text = "\n\n".join(all_text)
            
            print(f"\n✅ Successfully extracted text from {len(files)} documents")
            print(f"Total characters: {len(extracted_text):,}")
                
        except Exception as e:
            print(f"❌ Error: {str(e)}")
            import traceback
            traceback.print_exc()

process_button.on_click(on_process_button_clicked)

# Display widgets
display(HTML("<h3>📁 Upload and Extract PDF Documents</h3>"))
display(upload_widget)
display(process_button)
display(output)

print("👆 Use the widget above to select PDFs and extract their text")


## 5. Ask Questions


In [None]:
# Check if text is extracted
if extracted_text is None:
    print("⚠️ Please extract text from PDFs first (see section 4)")
else:
    # Function to ask questions
    def ask_question(question: str) -> str:
        """Ask a question about the documents."""
        # Create prompt with context
        full_prompt = f"""Based on the following document content, please answer the question.

Document Content:
{extracted_text}

Question: {question}

Please provide a detailed answer based only on the information in the documents above. If the information is not in the documents, please say so."""
        
        # Get response
        response = llm.invoke(full_prompt)
        return response
    
    print("✅ Ready to answer questions!")
    print(f"Documents loaded: {', '.join(document_names)}")
    print("\nUse ask_question('your question') to ask questions.")


## 6. Example Question


In [None]:
# Check if everything is ready
if extracted_text is None:
    print("⚠️ Please extract text from PDFs first (see section 4)")
elif 'ask_question' not in globals():
    print("⚠️ Please run section 5 first")
else:
    # Ask a question about the documents
    question = "What are the key findings in the documents?"
    
    print(f"Question: {question}\n")
    print("Generating response...\n")
    
    response = ask_question(question)
    
    print("Response:")
    print("-" * 80)
    print(response)
    print("-" * 80)


## 7. Interactive Chat Interface

Chat with your documents interactively.


In [None]:
# Check if everything is ready
if extracted_text is None:
    print("⚠️ Please extract text from PDFs first (see section 4)")
else:
    # Chat history
    chat_history = []
    
    def chat_with_documents(question: str) -> str:
        """Send a question to the LLM and get a response with chat history."""
        response = ask_question(question)
        
        # Store in chat history
        chat_history.append({
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "question": question,
            "response": response
        })
        
        return response
    
    print("✅ Chat interface ready!")
    print("\nUse chat_with_documents('your question') to ask questions.")
    print("\nExample:")
    print("  chat_with_documents('What are the key findings?')")


## 8. Example: Ask Questions


In [None]:
# Example: Ask your first question
if 'chat_with_documents' in globals():
    response = chat_with_documents("What are the main topics in the documents?")
    print(response)
else:
    print("⚠️ Please complete the setup first")


## 9. View Chat History


In [None]:
# Display chat history as a DataFrame
if 'chat_history' in globals() and chat_history:
    df_history = pd.DataFrame(chat_history)
    display(df_history)
else:
    print("No chat history yet. Start asking questions!")
