# Document Chat Bot

This notebook demonstrates a multi-document chatbot for querying PDF documents.


## 1. Install Required Packages


In [None]:
%pip install goldmansachs.awm_genai -U
%pip install python-dotenv pandas ipywidgets


## 2. Import Libraries and Configuration


In [None]:
from goldmansachs.awm_genai import DocUtils, LLM, LLMConfig
import os
from typing import List, Dict
import pandas as pd
from datetime import datetime
import tempfile
from IPython.display import display, HTML
import ipywidgets as widgets


In [None]:
# Configuration
app_id = "trai"
env = "uat"

# Model Configuration
model_name = "gemini-2.0-flash"
temperature = 0
log_level = "DEBUG"

print(f"App ID: {app_id}")
print(f"Environment: {env}")
print(f"Model: {model_name}")


## 3. Initialize Document Utils


In [None]:
# Initialize Document Utilities
doc_utils = DocUtils(app_id=app_id, env=env)
print("Document utilities initialized successfully")


## 4. Upload Documents

Use the file upload widget below to select your PDF documents.


In [None]:
# Create file upload widget
upload_widget = widgets.FileUpload(
    accept='.pdf',
    multiple=True,
    description='Select PDFs'
)

# Create upload button
upload_button = widgets.Button(
    description='Upload Documents',
    button_style='primary',
    icon='upload'
)

# Create output widget for status messages
output = widgets.Output()

# Store uploaded documents globally
uploaded_documents = None
temp_file_paths = []

def on_upload_button_clicked(b):
    global uploaded_documents, temp_file_paths
    
    with output:
        output.clear_output()
        
        if not upload_widget.value:
            print("⚠️ Please select PDF files first")
            return
        
        try:
            # Clear previous temp files
            for temp_path in temp_file_paths:
                if os.path.exists(temp_path):
                    os.unlink(temp_path)
            temp_file_paths = []
            
            # Save uploaded files to temp directory
            files = upload_widget.value
            print(f"Processing {len(files)} files...")
            
            for file_info in files:
                # file_info is a Bunch object with 'name' and 'content' attributes
                filename = file_info['name']
                content = file_info['content']
                
                # Create temp file
                with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
                    tmp_file.write(content)
                    temp_file_paths.append(tmp_file.name)
                    print(f"  ✓ {filename}")
            
            # Upload to system
            print("\nUploading documents to system...")
            uploaded_documents = doc_utils.upload(file_paths=temp_file_paths)
            
            print(f"\n✅ Successfully uploaded {len(uploaded_documents)} documents")
            print("\nDocument Details:")
            for i, doc in enumerate(uploaded_documents, 1):
                print(f"  {i}. {doc}")
                
        except Exception as e:
            print(f"❌ Error: {str(e)}")
            import traceback
            traceback.print_exc()

upload_button.on_click(on_upload_button_clicked)

# Display widgets
display(HTML("<h3>📁 Upload PDF Documents</h3>"))
display(upload_widget)
display(upload_button)
display(output)

print("👆 Use the widget above to select and upload your PDF documents")


## 5. Initialize LLM with Configuration


In [None]:
# Check if documents are uploaded
if uploaded_documents is None:
    print("⚠️ Please upload documents first using the widget above")
else:
    # Define LLM configuration
    llm_config = LLMConfig(
        app_id=app_id,
        env=env,
        model_name=model_name,
        temperature=temperature,
        log_level=log_level,
    )
    
    # Initialize LLM
    llm = LLM.init(config=llm_config)
    print("✅ LLM initialized successfully")
    print(f"Model: {model_name}")
    print(f"Temperature: {temperature}")
    
    # Use uploaded documents
    documents = uploaded_documents


## 6. Query Documents - Single Question


In [None]:
# Check if documents and LLM are ready
if uploaded_documents is None:
    print("⚠️ Please upload documents first (see section 4)")
elif 'llm' not in globals():
    print("⚠️ Please initialize LLM first (see section 5)")
else:
    # Ask a question about the documents
    question = "Write a summary about the documents."
    
    print(f"Question: {question}\n")
    print("Generating response...\n")
    
    response = llm.invoke(
        question,
        documents=documents,
    )
    
    print("Response:")
    print("-" * 80)
    print(response)
    print("-" * 80)


## 7. Interactive Chat Interface

Chat with your documents interactively.


In [None]:
# Check if everything is ready
if uploaded_documents is None:
    print("⚠️ Please upload documents first (see section 4)")
elif 'llm' not in globals():
    print("⚠️ Please initialize LLM first (see section 5)")
else:
    # Chat history
    chat_history = []
    
    def chat_with_documents(question: str) -> str:
        """Send a question to the LLM and get a response."""
        response = llm.invoke(
            question,
            documents=documents,
        )
        
        # Store in chat history
        chat_history.append({
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "question": question,
            "response": response
        })
        
        return response
    
    print("✅ Chat interface ready!")
    print("\nUse chat_with_documents('your question') to ask questions.")
    print("\nExample:")
    print("  chat_with_documents('What are the key findings?')")


## 8. Example: Ask Questions


In [None]:
# Example: Ask your first question
if 'chat_with_documents' in globals():
    response = chat_with_documents("What are the key findings in the documents?")
    print(response)
else:
    print("⚠️ Please complete the setup first")


## 9. View Chat History


In [None]:
# Display chat history as a DataFrame
if 'chat_history' in globals() and chat_history:
    df_history = pd.DataFrame(chat_history)
    display(df_history)
else:
    print("No chat history yet. Start asking questions!")
