# File Upload Test

This notebook tests the file upload functionality of the Poster Extraction API.
C:\Users\sanjay\Developer\posters-science-posterextraction-beta\example_posters\8228476.pdf

In [None]:
# Import required libraries
import requests
import json
from pathlib import Path


In [None]:
# Configuration
API_URL = "http://localhost:8000"
FILE_PATH = r"C:\\Users\\sanjay\\Developer\\posters-science-posterextraction-beta\\example_posters\\10890106.pdf"  

# Alternative: use an example file if available
# FILE_PATH = "manual_poster_annotation/42/42.pdf"


In [None]:
# Check if file exists
file_path = Path(FILE_PATH)
if file_path.exists():
    file_size = file_path.stat().st_size / (1024 * 1024)  # Size in MB
    print(f"✓ File found: {FILE_PATH}")
    print(f"  Size: {file_size:.2f} MB")
    print(f"  Extension: {file_path.suffix}")
else:
    print(f"✗ File not found: {FILE_PATH}")
    print("  Please update FILE_PATH in the previous cell")


In [None]:
# Test health endpoint first
try:
    response = requests.get(f"{API_URL}/health", timeout=10)
    print(f"Health check status: {response.status_code}")
    print(f"Response: {json.dumps(response.json(), indent=2)}")
except Exception as e:
    print(f"Error connecting to API: {e}")
    print("Make sure the API server is running!")


In [None]:
# Upload file and test extraction
if file_path.exists():
    try:
        print(f"Uploading {file_path.name} to {API_URL}/extract...")
        
        with open(file_path, "rb") as f:
            files = {"file": (file_path.name, f, file_path.suffix.lower())}
            response = requests.post(
                f"{API_URL}/extract",
                files=files,
                timeout=300  # 5 minutes timeout for processing
            )
        
        print(f"\nStatus Code: {response.status_code}")
        
        if response.status_code == 200:
            result = response.json()
            print("✓ Upload successful!")
            print(f"\nResponse keys: {list(result.keys())}")
        else:
            print("✗ Upload failed!")
            try:
                error_data = response.json()
                print(f"Error: {json.dumps(error_data, indent=2)}")
            except:
                print(f"Error: {response.text}")
                
    except requests.exceptions.Timeout:
        print("✗ Request timed out (processing may take a while)")
    except Exception as e:
        print(f"✗ Error: {e}")
        import traceback
        traceback.print_exc()
else:
    print("Skipping upload - file not found")


In [None]:
# Display full response (if upload was successful)
if file_path.exists() and 'response' in locals() and response.status_code == 200:
    print("Full Response:")
    print(json.dumps(result, indent=2, ensure_ascii=False))


In [None]:
# Display summary of extracted content
if file_path.exists() and 'result' in locals() and response.status_code == 200:
    print("=" * 60)
    print("Extraction Summary")
    print("=" * 60)
    
    if "posterContent" in result:
        poster = result["posterContent"]
        if "posterTitle" in poster:
            print(f"\nTitle: {poster['posterTitle']}")
        
        if "sections" in poster:
            print(f"\nSections found: {len(poster['sections'])}")
            for i, section in enumerate(poster["sections"], 1):
                title = section.get("sectionTitle", "Unknown")
                content_len = len(section.get("sectionContent", ""))
                print(f"  {i}. {title}: {content_len} characters")
    
    if "creators" in result:
        print(f"\nCreators: {len(result['creators'])}")
        for creator in result["creators"][:5]:  # Show first 5
            print(f"  - {creator.get('name', 'Unknown')}")
    
    if "titles" in result:
        print(f"\nTitles: {len(result['titles'])}")
        for title_obj in result["titles"][:3]:  # Show first 3
            print(f"  - {title_obj.get('title', 'Unknown')[:80]}...")
