# Vector Store API Service - Google Colab Example

This notebook demonstrates how to use the deployed Vector Store API service from Google Colab.


In [None]:
# Install required packages
!pip install requests

In [None]:
import requests
import json
import os
from datetime import datetime

In [None]:
# Configuration
# Replace with your deployed service URL
API_BASE_URL = "https://your-service.replit.app"  # Update this with your actual deployment URL

class VectorStoreAPIClient:
    def __init__(self, base_url):
        self.base_url = base_url
        
    def check_health(self):
        """Check if the service is healthy"""
        try:
            response = requests.get(f"{self.base_url}/health")
            return response.json()
        except Exception as e:
            return {"status": "error", "message": str(e)}
    
    def get_service_info(self):
        """Get service information"""
        try:
            response = requests.get(f"{self.base_url}/info")
            return response.json()
        except Exception as e:
            return {"error": str(e)}
    
    def upload_and_attach(self, file_path, vector_store_id=None, attributes=None):
        """Upload file and attach to vector store"""
        payload = {
            "file_path": file_path,
            "attributes": attributes or {}
        }
        
        if vector_store_id:
            payload["vector_store_id"] = vector_store_id
            
        try:
            response = requests.post(
                f"{self.base_url}/upload_and_attach",
                headers={"Content-Type": "application/json"},
                json=payload,
                timeout=300  # 5 minute timeout for large files
            )
            return response.json()
        except Exception as e:
            return {"success": False, "error": str(e)}

# Initialize client
client = VectorStoreAPIClient(API_BASE_URL)

In [None]:
# Test 1: Check service health
print("🔍 Testing service health...")
health = client.check_health()
print(f"Status: {health.get('status', 'unknown')}")

if health.get('status') == 'healthy':
    print("✅ Service is healthy and ready to use!")
    print(f"Version: {health.get('version', 'unknown')}")
    print(f"OpenAI Configured: {health.get('openai_configured', False)}")
else:
    print("❌ Service is not healthy. Please check the deployment.")
    print(f"Error: {health.get('message', 'Unknown error')}")

In [None]:
# Test 2: Get service information
print("\n📋 Getting service information...")
info = client.get_service_info()

if 'service' in info:
    print(f"Service: {info['service']}")
    print(f"Description: {info['description']}")
    print(f"Available endpoints: {len(info['endpoints'])} endpoints")
    print(f"Features: {len(info['features'])} features")
    
    print("\nEndpoints:")
    for endpoint, desc in info['endpoints'].items():
        print(f"  • {endpoint}: {desc}")
else:
    print(f"Error getting service info: {info.get('error', 'Unknown error')}")

In [None]:
# Test 3: Upload a file (Note: You'll need to upload a file to Colab first)
# Upload a file to Colab using the file browser, then update the path below

print("\n📤 Testing file upload and vector store attachment...")
print("Note: Upload a PDF file to Colab first, then update the file_path below")

# Example - update this path to your uploaded file
test_file_path = "/content/your-document.pdf"  # Update this path

# Check if file exists (will fail if you haven't uploaded a file)
if os.path.exists(test_file_path):
    print(f"Found file: {test_file_path}")
    
    # Custom attributes for this Colab session
    colab_attributes = {
        "source": "google_colab",
        "session_id": "colab_test_session",
        "uploaded_at": datetime.now().isoformat(),
        "notebook": "vector_store_api_test",
        "experiment": "cross_platform_integration",
        "user_type": "researcher"
    }
    
    print("\nUploading file with custom attributes...")
    result = client.upload_and_attach(
        file_path=test_file_path,
        attributes=colab_attributes
    )
    
    if result.get("success"):
        print("✅ Upload successful!")
        print(f"📄 File ID: {result['file']['id']}")
        print(f"📊 File size: {result['file']['bytes']} bytes")
        print(f"🔗 Vector store status: {result['vector_store_file']['status']}")
        
        print("\n🏷️  Applied attributes:")
        attrs = result['applied_attributes']
        for key, value in attrs.items():
            print(f"  • {key}: {value}")
            
        print("\n📊 Auto-extracted metadata:")
        print(f"  • Company: {attrs.get('company', 'N/A')}")
        print(f"  • Document Type: {attrs.get('document_type', 'N/A')}")
        print(f"  • Year: {attrs.get('year', 'N/A')}")
        print(f"  • Category: {attrs.get('category', 'N/A')}")
    else:
        print(f"❌ Upload failed: {result.get('error')}")
        
else:
    print(f"❌ File not found: {test_file_path}")
    print("💡 To test file upload:")
    print("   1. Upload a PDF file to Colab using the file browser")
    print("   2. Update the 'test_file_path' variable above")
    print("   3. Run this cell again")

In [None]:
# Example: How to use this from your research workflow
print("\n🧪 Example: Integration with Research Workflow")
print("=" * 50)

example_code = '''
# In your research notebook, you can now easily upload documents:

import requests

# Your deployed API URL
api_url = "https://your-service.replit.app"

# Upload research papers with custom metadata
def upload_research_document(file_path, paper_title, authors, year):
    attributes = {
        "source": "research_pipeline",
        "paper_title": paper_title,
        "authors": authors,
        "publication_year": str(year),
        "research_domain": "AI/ML",
        "uploaded_by": "researcher_name"
    }
    
    response = requests.post(f"{api_url}/upload_and_attach", json={
        "file_path": file_path,
        "attributes": attributes
    })
    
    return response.json()

# Example usage:
# result = upload_research_document(
#     "/content/transformer_paper.pdf",
#     "Attention Is All You Need", 
#     "Vaswani et al.",
#     2017
# )
'''

print(example_code)

## 🚀 Deployment Instructions

To make your API available on the Internet:

### Option 1: Replit Deployments (Recommended)
1. In your Replit project, click the "Deploy" button
2. Choose "Autoscale" deployment
3. Your service will be available at `https://your-project.replit.app`
4. Update the `API_BASE_URL` in this notebook with your deployment URL

### Option 2: Docker Deployment
1. Use the provided `Dockerfile` and `docker-compose.yml`
2. Deploy to any cloud provider (AWS, GCP, Azure, DigitalOcean)
3. Expose port 5001 to the internet

### Option 3: Cloud Functions
- Deploy as AWS Lambda, Google Cloud Functions, or Azure Functions
- Use the provided code as the function handler

## 🔒 Security Considerations

For production use:
- Add API key authentication
- Implement rate limiting
- Use HTTPS
- Validate file types and sizes
- Add monitoring and logging
