# Bucket Management

Buckets are containers for organizing your documents in Storm API. Think of them as folders or categories.

## Setup

In [None]:
import requests
import json
import os
from datetime import datetime

# Configuration
API_KEY = os.getenv("STORM_API_KEY", "your-api-key-here")
API_URL = "https://https://live-stargate.sionic.im"

headers = {"storm-api-key": API_KEY}

# Helper function
def api_request(method, endpoint, **kwargs):
    url = f"{API_URL}{endpoint}"
    response = requests.request(method, url, headers=headers, **kwargs)
    
    if response.status_code == 200:
        return response.json()["data"]
    else:
        print(f"❌ Error {response.status_code}: {response.text}")
        return None

print("✅ Setup complete")

## 1. Get Agent ID

First, we need an agent to create buckets in.

In [None]:
# Get first available agent
agents_data = api_request("GET", "/api/v2/agents", params={"page": 1, "size": 1})

if agents_data and agents_data["data"]:
    agent = agents_data["data"][0]
    agent_id = agent["id"]
    print(f"✅ Using agent: {agent['name']} (ID: {agent_id})")
else:
    print("❌ No agents found. Please create an agent first.")
    agent_id = None

## 2. Create Buckets

Let's create different types of buckets for organizing documents.

In [None]:
def create_bucket(agent_id, name):
    """Create a bucket with given name."""
    data = api_request(
        "POST", 
        "/api/v2/buckets",
        json={"agentId": agent_id, "name": name}
    )
    
    if data:
        print(f"✅ Created bucket: {data['name']} (ID: {data['id']})")
        return data
    return None

# Create multiple buckets for different purposes
if agent_id:
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    
    buckets = [
        f"contracts-{timestamp}",
        f"invoices-{timestamp}",
        f"reports-{timestamp}",
        f"general-{timestamp}"
    ]
    
    created_buckets = []
    
    print("Creating buckets...\n")
    for bucket_name in buckets:
        bucket = create_bucket(agent_id, bucket_name)
        if bucket:
            created_buckets.append(bucket)

## 3. List and Search Buckets

In [None]:
def list_buckets(agent_id, page=1, size=20):
    """List all buckets for an agent."""
    data = api_request(
        "GET",
        "/api/v2/buckets",
        params={"agentId": agent_id, "page": page, "size": size}
    )
    
    if data:
        buckets = data["data"]
        page_info = data["pageInfo"]
        
        print(f"📦 Found {page_info['totalElements']} buckets (Page {page_info['page']}/{page_info['totalPages']})\n")
        
        for bucket in buckets:
            print(f"• {bucket['name']}")
            print(f"  ID: {bucket['id']}")
            print(f"  Type: {bucket.get('type', 'default')}")
            print(f"  Created: {bucket['createdAt'][:19]}")
            print()
        
        return buckets
    return []

# List all buckets
if agent_id:
    all_buckets = list_buckets(agent_id)

## 4. Bucket Organization Strategies

Here are some common patterns for organizing buckets.

In [None]:
# Strategy 1: By Document Type
document_type_buckets = [
    "pdf-documents",
    "spreadsheets",
    "presentations",
    "text-files"
]

# Strategy 2: By Department
department_buckets = [
    "hr-documents",
    "finance-reports",
    "marketing-materials",
    "engineering-docs"
]

# Strategy 3: By Time Period
time_period_buckets = [
    "2024-q1",
    "2024-q2",
    "2024-q3",
    "2024-q4"
]

# Strategy 4: By Project
project_buckets = [
    "project-alpha",
    "project-beta",
    "project-gamma"
]

print("📋 Bucket Organization Strategies:\n")
print("1. By Document Type:")
for b in document_type_buckets:
    print(f"   • {b}")

print("\n2. By Department:")
for b in department_buckets:
    print(f"   • {b}")

print("\n3. By Time Period:")
for b in time_period_buckets:
    print(f"   • {b}")

print("\n4. By Project:")
for b in project_buckets:
    print(f"   • {b}")

## 5. Bucket Utility Functions

In [None]:
class BucketManager:
    """Utility class for managing buckets."""
    
    def __init__(self, api_key, api_url="https://https://live-stargate.sionic.im"):
        self.headers = {"storm-api-key": api_key}
        self.api_url = api_url
    
    def _request(self, method, endpoint, **kwargs):
        url = f"{self.api_url}{endpoint}"
        response = requests.request(method, url, headers=self.headers, **kwargs)
        if response.status_code == 200:
            return response.json()["data"]
        return None
    
    def create_bucket(self, agent_id, name):
        """Create a new bucket."""
        return self._request(
            "POST", "/api/v2/buckets",
            json={"agentId": agent_id, "name": name}
        )
    
    def get_bucket_by_name(self, agent_id, name):
        """Find a bucket by name."""
        page = 1
        while True:
            data = self._request(
                "GET", "/api/v2/buckets",
                params={"agentId": agent_id, "page": page, "size": 100}
            )
            
            if not data:
                break
            
            for bucket in data["data"]:
                if bucket["name"] == name:
                    return bucket
            
            if page >= data["pageInfo"]["totalPages"]:
                break
            
            page += 1
        
        return None
    
    def get_or_create_bucket(self, agent_id, name):
        """Get existing bucket or create new one."""
        bucket = self.get_bucket_by_name(agent_id, name)
        
        if bucket:
            print(f"✅ Found existing bucket: {name}")
            return bucket
        else:
            print(f"📦 Creating new bucket: {name}")
            return self.create_bucket(agent_id, name)
    
    def create_bucket_set(self, agent_id, bucket_names):
        """Create multiple buckets."""
        created = []
        for name in bucket_names:
            bucket = self.get_or_create_bucket(agent_id, name)
            if bucket:
                created.append(bucket)
        return created

# Initialize manager
manager = BucketManager(API_KEY)

# Test the manager
if agent_id:
    print("Testing BucketManager...\n")
    
    # Get or create a bucket
    test_bucket = manager.get_or_create_bucket(agent_id, "test-bucket")
    
    # Create a set of buckets
    print("\nCreating bucket set...")
    bucket_set = manager.create_bucket_set(agent_id, [
        "knowledge-base",
        "customer-data",
        "internal-docs"
    ])

## 6. Best Practices

Follow these best practices when working with buckets.

In [None]:
print("📚 Bucket Best Practices:\n")

print("1. Naming Conventions:")
print("   ✅ Use descriptive names: 'customer-contracts-2024'")
print("   ✅ Use hyphens or underscores: 'hr_policies'")
print("   ❌ Avoid special characters: 'contracts@2024!'")
print("   ❌ Avoid very long names")

print("\n2. Organization:")
print("   • Keep related documents together")
print("   • Don't create too many buckets (harder to manage)")
print("   • Don't put everything in one bucket (harder to search)")
print("   • Consider future growth when designing structure")

print("\n3. Access Patterns:")
print("   • Group by how documents will be queried")
print("   • Consider who needs access to what")
print("   • Think about retention policies")

# Example: Well-organized bucket structure
example_structure = {
    "Legal": [
        "contracts-active",
        "contracts-archived",
        "legal-policies"
    ],
    "Finance": [
        "invoices-2024",
        "financial-reports",
        "budget-planning"
    ],
    "HR": [
        "employee-handbook",
        "job-descriptions",
        "training-materials"
    ]
}

print("\n📂 Example Bucket Structure:")
for category, buckets in example_structure.items():
    print(f"\n{category}:")
    for bucket in buckets:
        print(f"  └── {bucket}")

## Summary

You've learned how to:
- ✅ Create buckets for organizing documents
- ✅ List and search through buckets
- ✅ Implement organization strategies
- ✅ Use utility functions for bucket management
- ✅ Follow best practices

## Next Steps

- [Upload Documents](./02-upload-documents.ipynb) - Start adding documents to your buckets
- [Document Processing](./03-document-processing.md) - Learn about document learning