# Connection Layer Solutions

In [None]:
from abc import ABC, abstractmethod
from typing import Optional
import os

class BaseConnection(ABC):
    @abstractmethod
    def get_path(self, relative_path: str) -> str:
        pass

    @abstractmethod
    def validate(self) -> None:
        pass

## Solution 1: S3Connection

In [None]:
class S3Connection(BaseConnection):
    """AWS S3 connection."""

    def __init__(
        self,
        bucket: str,
        prefix: str = "",
        region: str = "us-east-1",
        access_key_id: Optional[str] = None,
        secret_access_key: Optional[str] = None,
        validate: bool = True,
    ):
        self.bucket = bucket
        self.prefix = prefix.strip("/") if prefix else ""
        self.region = region
        self.access_key_id = access_key_id
        self.secret_access_key = secret_access_key

        if validate:
            self.validate()

    def validate(self) -> None:
        """Validate S3 connection configuration."""
        if not self.bucket:
            raise ValueError("S3 connection requires 'bucket'")
        if not self.access_key_id:
            raise ValueError("S3 connection requires 'access_key_id'")
        if not self.secret_access_key:
            raise ValueError("S3 connection requires 'secret_access_key'")

    def get_path(self, relative_path: str) -> str:
        """Get full s3:// URI for relative path."""
        clean_path = relative_path.lstrip("/")
        
        if self.prefix:
            full_path = f"{self.prefix}/{clean_path}"
        else:
            full_path = clean_path

        return f"s3://{self.bucket}/{full_path}"

    def pandas_storage_options(self) -> dict:
        """Get storage options for pandas/fsspec."""
        return {
            "key": self.access_key_id,
            "secret": self.secret_access_key,
            "client_kwargs": {"region_name": self.region},
        }

### Test S3Connection

In [None]:
conn = S3Connection(
    bucket="my-data-bucket",
    prefix="analytics/v2",
    region="us-west-2",
    access_key_id="AKIA...",
    secret_access_key="secret..."
)

print(conn.get_path("raw/sales.parquet"))
print(conn.get_path("processed/sales_clean.parquet"))
print()
print("Storage options:", conn.pandas_storage_options())

assert conn.get_path("raw/sales.parquet") == "s3://my-data-bucket/analytics/v2/raw/sales.parquet"
print("\nâœ… S3Connection tests passed!")

## Solution 2: GCSConnection

In [None]:
class GCSConnection(BaseConnection):
    """Google Cloud Storage connection."""

    def __init__(
        self,
        bucket: str,
        prefix: str = "",
        project: Optional[str] = None,
        credentials_path: Optional[str] = None,
        validate: bool = True,
    ):
        self.bucket = bucket
        self.prefix = prefix.strip("/") if prefix else ""
        self.project = project
        self.credentials_path = credentials_path

        if validate:
            self.validate()

    def validate(self) -> None:
        """Validate GCS connection configuration."""
        if not self.bucket:
            raise ValueError("GCS connection requires 'bucket'")

    def get_path(self, relative_path: str) -> str:
        """Get full gs:// URI for relative path."""
        clean_path = relative_path.lstrip("/")
        
        if self.prefix:
            full_path = f"{self.prefix}/{clean_path}"
        else:
            full_path = clean_path

        return f"gs://{self.bucket}/{full_path}"

    def pandas_storage_options(self) -> dict:
        """Get storage options for pandas/fsspec."""
        options = {}
        
        if self.project:
            options["project"] = self.project
        
        if self.credentials_path:
            options["token"] = self.credentials_path
        
        return options

### Test GCSConnection

In [None]:
conn = GCSConnection(
    bucket="my-gcs-bucket",
    prefix="analytics",
    project="my-project-123",
    credentials_path="/path/to/service-account.json"
)

print(conn.get_path("raw/sales.parquet"))
print(conn.get_path("processed/sales_clean.parquet"))
print()
print("Storage options:", conn.pandas_storage_options())

assert conn.get_path("raw/sales.parquet") == "gs://my-gcs-bucket/analytics/raw/sales.parquet"
print("\nâœ… GCSConnection tests passed!")

## Solution 3: Connection Factory

In [None]:
from pathlib import Path

class LocalConnection(BaseConnection):
    def __init__(self, base_path: str = "./data"):
        self.base_path = Path(base_path)
    
    def get_path(self, relative_path: str) -> str:
        full_path = self.base_path / relative_path
        return str(full_path.absolute())
    
    def validate(self) -> None:
        self.base_path.mkdir(parents=True, exist_ok=True)


def create_connection(config: dict) -> BaseConnection:
    """Create connection from configuration dict."""
    config = config.copy()
    conn_type = config.pop("type")
    
    if conn_type == "local":
        return LocalConnection(**config)
    elif conn_type == "s3":
        return S3Connection(**config)
    elif conn_type == "gcs":
        return GCSConnection(**config)
    else:
        raise ValueError(f"Unknown connection type: {conn_type}")

### Test Factory

In [None]:
s3_config = {
    "type": "s3",
    "bucket": "my-bucket",
    "access_key_id": "key",
    "secret_access_key": "secret"
}

conn = create_connection(s3_config)
print(f"Created: {type(conn).__name__}")
print(f"Path: {conn.get_path('test.parquet')}")

assert isinstance(conn, S3Connection)
assert conn.get_path("test.parquet") == "s3://my-bucket/test.parquet"
print("\nâœ… Factory tests passed!")

In [None]:
gcs_config = {
    "type": "gcs",
    "bucket": "my-gcs-bucket",
    "project": "my-project"
}

conn = create_connection(gcs_config)
assert isinstance(conn, GCSConnection)
print("âœ… GCS factory works!")

In [None]:
local_config = {
    "type": "local",
    "base_path": "./test_data"
}

conn = create_connection(local_config)
assert isinstance(conn, LocalConnection)
print("âœ… Local factory works!")

## Solution 4: Path Validation

In [None]:
def validate_path(connection: BaseConnection, path: str) -> bool:
    """Validate that a path is safe and well-formed."""
    if not path or path.strip() == "":
        raise ValueError("Path cannot be empty")
    
    if path.startswith("/"):
        raise ValueError(f"Path cannot be absolute: {path}")
    
    if ".." in path:
        raise ValueError(f"Path cannot contain parent references (..): {path}")
    
    return True

### Test Path Validation

In [None]:
conn = LocalConnection()

# Valid paths
assert validate_path(conn, "raw/sales.parquet") == True
assert validate_path(conn, "folder/subfolder/file.csv") == True
print("âœ… Valid paths accepted")

# Invalid: absolute path
try:
    validate_path(conn, "/absolute/path.parquet")
    assert False, "Should have raised ValueError"
except ValueError as e:
    print(f"âœ… Caught absolute path: {e}")

# Invalid: parent reference
try:
    validate_path(conn, "../../../etc/passwd")
    assert False, "Should have raised ValueError"
except ValueError as e:
    print(f"âœ… Caught parent reference: {e}")

# Invalid: empty
try:
    validate_path(conn, "")
    assert False, "Should have raised ValueError"
except ValueError as e:
    print(f"âœ… Caught empty path: {e}")

print("\nâœ… All path validation tests passed!")

## Bonus Solution: Environment-Based Connection

In [None]:
def get_connection_for_env(env: str = None) -> BaseConnection:
    """Get appropriate connection for environment."""
    if env is None:
        env = os.getenv("ODIBI_ENV", "local")
    
    if env == "local":
        return LocalConnection(base_path="./data")
    
    elif env == "dev":
        return S3Connection(
            bucket="my-dev-bucket",
            prefix="analytics",
            access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
            secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
            region="us-east-1",
        )
    
    elif env == "prod":
        return S3Connection(
            bucket="my-prod-bucket",
            prefix="analytics",
            access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
            secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
            region="us-west-2",
        )
    
    else:
        raise ValueError(f"Unknown environment: {env}")

### Test Environment Selection

In [None]:
# Mock environment variables
os.environ["AWS_ACCESS_KEY_ID"] = "test_key"
os.environ["AWS_SECRET_ACCESS_KEY"] = "test_secret"

local_conn = get_connection_for_env("local")
assert isinstance(local_conn, LocalConnection)
print(f"âœ… Local: {local_conn.get_path('test.parquet')}")

dev_conn = get_connection_for_env("dev")
assert isinstance(dev_conn, S3Connection)
assert dev_conn.bucket == "my-dev-bucket"
print(f"âœ… Dev: {dev_conn.get_path('test.parquet')}")

prod_conn = get_connection_for_env("prod")
assert isinstance(prod_conn, S3Connection)
assert prod_conn.bucket == "my-prod-bucket"
print(f"âœ… Prod: {prod_conn.get_path('test.parquet')}")

print("\nâœ… All environment tests passed!")

## ðŸŽ¯ Key Insights

1. **Factory Pattern** makes it easy to create connections from config
2. **Path Validation** prevents security issues (path traversal)
3. **Environment Selection** enables seamless dev/prod workflows
4. **Consistent Interface** means all connections work the same way

These patterns are the foundation of Odibi's configuration system!