In [8]:
import requests
import os
import json

# --- 1. CONFIGURATION ---
# This is the API Gateway endpoint 
API_ENDPOINT = "https://s35t79fgcl.execute-api.ap-south-1.amazonaws.com/dev/upload-url"
# ---

def get_upload_url(filename):
    """
    Calls Lambda API to get a presigned S3 URL.
    """
    print(f"1. Requesting upload URL for: {filename}")
    
    try:
        # We send the filename in the body, as the Lambda code expects.
        response = requests.post(API_ENDPOINT, json={'filename': filename})
        
        if response.status_code != 200:
            print(f"   ‚ùå Error: Failed to get URL (Status {response.status_code})")
            print(f"   Response: {response.text}")
            return None, None

        # API Gateway proxy integration returns a JSON string in the 'body' key
        try:
            data = json.loads(response.json().get('body', response.text))
        except json.JSONDecodeError:
            print(f"   ‚ùå Error: Could not decode JSON response: {response.text}")
            return None, None
            
        upload_url = data.get('upload_url')
        doc_id = data.get('document_id')
        
        if not upload_url or not doc_id:
            print(f"   ‚ùå Error: Incomplete response from Lambda.")
            print(f"   Lambda Response: {data}")
            return None, None

        print(f"   ‚úÖ Success. Document ID: {doc_id}")
        return upload_url, doc_id
        
    except requests.exceptions.RequestException as e:
        print(f"   ‚ùå NETWORK ERROR: Could not connect to your API Gateway.")
        print(f"   Check that the API_ENDPOINT is correct and deployed.")
        print(f"   Error: {e}")
        return None, None

def upload_file_to_s3(upload_url, file_path):
    """
    Uploads the actual file to the presigned URL.
    (Renamed from 'upload_file' to avoid name conflict)
    """
    print(f"\n2. Uploading {os.path.basename(file_path)} to S3...")
    
    try:
        with open(file_path, 'rb') as f:
            file_data = f.read()
        
        # Make the PUT request with the raw file data
        response = requests.put(upload_url, data=file_data)
        
        if response.status_code == 200:
            print(f"   ‚úÖ SUCCESS! File upload complete.")
            return True
        else:
            # This will show the <Error> XML from S3 if it fails
            print(f"   ‚ùå UPLOAD FAILED (Status {response.status_code})")
            print(f"   Response from S3: {response.text}")
            return False
            
    except requests.exceptions.RequestException as e:
        print(f"   ‚ùå NETWORK ERROR during upload. This could be a firewall.")
        print(f"   Error: {e}")
        return False
    except FileNotFoundError:
        print(f"   ‚ùå Error: Local file not found at {file_path}")
        return False

def upload_document(file_path):
    """
    Main function to run the complete upload process from a Jupyter cell.
    
    Args:
        file_path (str): The local path to the file you want to upload.
    """
    
    # --- Check Config ---
    if "YOUR-API-ID" in API_ENDPOINT:
        print("="*50)
        print("‚ùå ERROR: Please edit the script (line 7)")
        print("   You must set your `API_ENDPOINT` variable.")
        print("="*50)
        return

    # --- Check File Path ---
    if not os.path.exists(file_path):
        print(f"‚ùå Error: File not found at {file_path}")
        return
        
    filename = os.path.basename(file_path)
    
    # --- Run Process ---
    print("="*50)
    upload_url, doc_id = get_upload_url(filename)
    
    if upload_url and doc_id:
        success = upload_file_to_s3(upload_url, file_path)
        if success:
            print("\n" + "="*50)
            print("üéâ Process Complete!")
            print(f"   Document ID: {doc_id}")
            print(f"   File: {filename}")
            print("\n   The 'process-document' Lambda should now be triggered.")
            print("="*50)
            return doc_id # Return the doc_id for use in other cells
        else:
            print("\nUpload failed. Please check errors above.")
    else:
        print("\nCould not get upload URL. Aborting.")

    return None

In [16]:
upload_document("1.pdf")

1. Requesting upload URL for: 1.pdf
   ‚úÖ Success. Document ID: 6d368fba

2. Uploading 1.pdf to S3...
   ‚úÖ SUCCESS! File upload complete.

üéâ Process Complete!
   Document ID: 6d368fba
   File: 1.pdf

   The 'process-document' Lambda should now be triggered.


'6d368fba'

In [20]:
import requests
import json
import os

# --- 1. CONFIGURATION ---
API_BASE_URL = "https://s35t79fgcl.execute-api.ap-south-1.amazonaws.com" 

QUERY_ENDPOINT = f"{API_BASE_URL}/dev/query"
ANSWER_ENDPOINT = f"{API_BASE_URL}/dev/answer"

def test_full_rag_pipeline(question, doc_id=None):
    """
    Calls /query to get chunks, then /answer to get a final response.
    
    Args:
        question (str): The user's question.
        doc_id (str, optional): The specific document to search in. 
                                If None, searches all documents.
    """
    
    print("="*60)
    print(f"RAG Pipeline Test")
    print(f"Question: {question}")
    print(f"Document ID: {doc_id or 'All Documents'}")
    print("="*60)
    
    # --- STEP 1: RETRIEVAL (/query) ---
    print("\n1. Calling /query endpoint to find relevant chunks...")
    query_payload = {
        "question": question,
        "document_id": doc_id,
        "top_k": 3 # Request top 3 chunks
    }
    
    try:
        response_query = requests.post(QUERY_ENDPOINT, json=query_payload)
        
        if response_query.status_code != 200:
            print(f"‚ùå /query FAILED (Status {response_query.status_code})")
            print(f"   Response: {response_query.text}")
            return

        # Load the body from the Lambda's response
        query_data = json.loads(response_query.json()['body'])
        top_chunks = query_data.get('top_chunks', [])
        
        if not top_chunks:
            print("‚ùå /query SUCCEEDED, but no relevant chunks were found.")
            return
            
        print(f"‚úÖ /query Success. Found {len(top_chunks)} relevant chunks.")

    except Exception as e:
        print(f"‚ùå Error during /query step: {e}")
        return

    # --- STEP 2: AUGMENTED GENERATION (/answer) ---
    print("\n2. Calling /answer endpoint to generate a final answer...")
    answer_payload = {
        "question": question,
        "top_chunks": top_chunks # Pass the chunks we just found
    }
    
    try:
        response_answer = requests.post(ANSWER_ENDPOINT, json=answer_payload)
        
        if response_answer.status_code != 200:
            print(f"‚ùå /answer FAILED (Status {response_answer.status_code})")
            print(f"   Response: {response_answer.text}")
            return
            
        # Load the body from the Lambda's response
        answer_data = json.loads(response_answer.json()['body'])
        final_answer = answer_data.get('answer', 'No answer found.')
        sources = answer_data.get('sources', [])
        
        print("\n" + "="*60)
        print("üéâ RAG PIPELINE SUCCEEDED!")
        print("="*60)
        
        print("\nFINAL ANSWER:\n")
        print(final_answer)
        
        print("\n\nSOURCES USED:\n")
        for i, source in enumerate(sources, 1):
            print(f"  {i}. {source['filename']} (Index: {source['chunk_index']}, Score: {source['similarity_score']})")
            print(f"     Excerpt: \"{source['excerpt']}\"\n")

    except Exception as e:
        print(f"‚ùå Error during /answer step: {e}")
        return

# --- RUN THE TEST ---
# ‚¨áÔ∏è This is the document_id from your successful 'handle-query' test ‚¨áÔ∏è
MY_DOCUMENT_ID = "6d368fba" 
MY_QUESTION = "What methodology was used in this study?"

test_full_rag_pipeline(MY_QUESTION, MY_DOCUMENT_ID)

# --- TEST 2: Ask a question to ALL documents (if you have more than one) ---
# test_full_rag_pipeline("What is the main conclusion about large language models?", None)

RAG Pipeline Test
Question: What methodology was used in this study?
Document ID: 6d368fba

1. Calling /query endpoint to find relevant chunks...
‚ùå Error during /query step: 'body'
