# Connection Layer Exercises

Build your own cloud storage connections!

In [None]:
from abc import ABC, abstractmethod
from typing import Optional

class BaseConnection(ABC):
    @abstractmethod
    def get_path(self, relative_path: str) -> str:
        pass

    @abstractmethod
    def validate(self) -> None:
        pass

## Exercise 1: S3Connection

Implement an AWS S3 connection.

**Requirements**:
- `bucket`: S3 bucket name
- `prefix`: Optional path prefix within bucket
- `region`: AWS region (default: "us-east-1")
- `access_key_id`: AWS access key
- `secret_access_key`: AWS secret key
- URI format: `s3://bucket/prefix/path`

**Methods**:
- `get_path(relative_path)`: Return full S3 URI
- `validate()`: Check bucket and auth are provided
- `pandas_storage_options()`: Return dict with AWS credentials

In [None]:
class S3Connection(BaseConnection):
    """AWS S3 connection."""

    def __init__(
        self,
        bucket: str,
        prefix: str = "",
        region: str = "us-east-1",
        access_key_id: Optional[str] = None,
        secret_access_key: Optional[str] = None,
        validate: bool = True,
    ):
        # TODO: Initialize instance variables
        pass

    def validate(self) -> None:
        """Validate S3 connection configuration."""
        # TODO: Check bucket, access_key_id, secret_access_key
        pass

    def get_path(self, relative_path: str) -> str:
        """Get full s3:// URI for relative path."""
        # TODO: Build s3://bucket/prefix/relative_path
        pass

    def pandas_storage_options(self) -> dict:
        """Get storage options for pandas/fsspec."""
        # TODO: Return dict with key, secret, region
        pass

### Test S3Connection

In [None]:
conn = S3Connection(
    bucket="my-data-bucket",
    prefix="analytics/v2",
    region="us-west-2",
    access_key_id="AKIA...",
    secret_access_key="secret..."
)

assert conn.get_path("raw/sales.parquet") == "s3://my-data-bucket/analytics/v2/raw/sales.parquet"
assert conn.pandas_storage_options()["key"] == "AKIA..."
print("✅ S3Connection tests passed!")

## Exercise 2: GCSConnection

Implement a Google Cloud Storage connection.

**Requirements**:
- `bucket`: GCS bucket name
- `prefix`: Optional path prefix
- `project`: GCP project ID
- `credentials_path`: Path to service account JSON (optional)
- URI format: `gs://bucket/prefix/path`

**Methods**:
- `get_path(relative_path)`: Return full GCS URI
- `validate()`: Check bucket and project are provided
- `pandas_storage_options()`: Return dict with token/project

In [None]:
class GCSConnection(BaseConnection):
    """Google Cloud Storage connection."""

    def __init__(
        self,
        bucket: str,
        prefix: str = "",
        project: Optional[str] = None,
        credentials_path: Optional[str] = None,
        validate: bool = True,
    ):
        # TODO: Initialize instance variables
        pass

    def validate(self) -> None:
        """Validate GCS connection configuration."""
        # TODO: Check bucket is provided
        pass

    def get_path(self, relative_path: str) -> str:
        """Get full gs:// URI for relative path."""
        # TODO: Build gs://bucket/prefix/relative_path
        pass

    def pandas_storage_options(self) -> dict:
        """Get storage options for pandas/fsspec."""
        # TODO: Return dict with project, token (if credentials_path)
        pass

### Test GCSConnection

In [None]:
conn = GCSConnection(
    bucket="my-gcs-bucket",
    prefix="analytics",
    project="my-project-123",
    credentials_path="/path/to/service-account.json"
)

assert conn.get_path("raw/sales.parquet") == "gs://my-gcs-bucket/analytics/raw/sales.parquet"
assert conn.pandas_storage_options()["project"] == "my-project-123"
print("✅ GCSConnection tests passed!")

## Exercise 3: Connection Factory

Create a factory function that builds connections from config dicts.

In [None]:
def create_connection(config: dict) -> BaseConnection:
    """Create connection from configuration dict.
    
    Args:
        config: Dict with 'type' key and connection-specific params
        
    Returns:
        Initialized connection
        
    Raises:
        ValueError: If connection type is unknown
    """
    # TODO: Implement factory pattern
    # - Extract 'type' from config
    # - Match on type and create appropriate connection
    # - Pass remaining config as **kwargs
    pass

### Test Factory

In [None]:
s3_config = {
    "type": "s3",
    "bucket": "my-bucket",
    "access_key_id": "key",
    "secret_access_key": "secret"
}

conn = create_connection(s3_config)
assert isinstance(conn, S3Connection)
assert conn.get_path("test.parquet") == "s3://my-bucket/test.parquet"
print("✅ Factory tests passed!")

## Exercise 4: Path Validation

Add a method to validate paths before using them.

In [None]:
def validate_path(connection: BaseConnection, path: str) -> bool:
    """Validate that a path is safe and well-formed.
    
    Rules:
    - No absolute paths (starting with /)
    - No parent directory references (..)
    - No empty path
    
    Args:
        connection: Connection instance
        path: Path to validate
        
    Returns:
        True if valid
        
    Raises:
        ValueError: If path is invalid
    """
    # TODO: Implement validation
    pass

### Test Path Validation

In [None]:
from pathlib import Path

class LocalConnection(BaseConnection):
    def __init__(self, base_path: str = "./data"):
        self.base_path = Path(base_path)
    def get_path(self, relative_path: str) -> str:
        return str(self.base_path / relative_path)
    def validate(self) -> None:
        pass

conn = LocalConnection()

# Should pass
assert validate_path(conn, "raw/sales.parquet") == True
assert validate_path(conn, "folder/subfolder/file.csv") == True

# Should fail
try:
    validate_path(conn, "/absolute/path.parquet")
    assert False, "Should have raised ValueError"
except ValueError:
    pass

try:
    validate_path(conn, "../../../etc/passwd")
    assert False, "Should have raised ValueError"
except ValueError:
    pass

print("✅ Path validation tests passed!")

## Bonus Exercise: Environment-Based Connection

Create a function that automatically selects the right connection based on environment.

In [None]:
import os

def get_connection_for_env(env: str = None) -> BaseConnection:
    """Get appropriate connection for environment.
    
    Args:
        env: Environment name (defaults to ODIBI_ENV env var)
        
    Returns:
        Connection instance for environment
        
    Environments:
    - local: LocalConnection
    - dev: S3Connection with dev bucket
    - prod: S3Connection with prod bucket
    """
    # TODO: Implement environment-based selection
    pass