# Design Patterns Exercise Solutions

## Solution 1: Validator Registry

In [None]:
from typing import Dict, Callable, Any
import re

class ValidatorRegistry:
    _validators: Dict[str, Callable] = {}
    
    @classmethod
    def register(cls, name: str):
        def decorator(func: Callable):
            cls._validators[name] = func
            return func
        return decorator
    
    @classmethod
    def get(cls, name: str) -> Callable:
        if name not in cls._validators:
            raise ValueError(f"Validator '{name}' not registered")
        return cls._validators[name]
    
    @classmethod
    def list_validators(cls) -> list[str]:
        return list(cls._validators.keys())

@ValidatorRegistry.register("not_null")
def not_null_validator(value: Any) -> bool:
    return value is not None and value != ""

@ValidatorRegistry.register("range_check")
def range_check_validator(value: Any, min_val: float = 0, max_val: float = 100) -> bool:
    return min_val <= value <= max_val

@ValidatorRegistry.register("email_format")
def email_format_validator(value: str) -> bool:
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return bool(re.match(pattern, value))

# Test
validator = ValidatorRegistry.get("email_format")
print(f"Valid email: {validator('test@example.com')}")  # True
print(f"Invalid email: {validator('not-an-email')}")  # False

range_validator = ValidatorRegistry.get("range_check")
print(f"In range: {range_validator(50, min_val=0, max_val=100)}")  # True
print(f"Out of range: {range_validator(150, min_val=0, max_val=100)}")  # False

## Solution 2: Data Source Factory

In [None]:
from abc import ABC, abstractmethod
import pandas as pd
from typing import Any

class DataSource(ABC):
    @abstractmethod
    def read(self) -> pd.DataFrame:
        pass

class CSVSource(DataSource):
    def __init__(self, path: str, **kwargs):
        self.path = path
        self.kwargs = kwargs
    
    def read(self) -> pd.DataFrame:
        print(f"Reading CSV from {self.path}")
        return pd.DataFrame({"source": ["csv"], "path": [self.path]})

class ParquetSource(DataSource):
    def __init__(self, path: str, **kwargs):
        self.path = path
        self.kwargs = kwargs
    
    def read(self) -> pd.DataFrame:
        print(f"Reading Parquet from {self.path}")
        return pd.DataFrame({"source": ["parquet"], "path": [self.path]})

class DatabaseSource(DataSource):
    def __init__(self, connection_string: str, query: str, **kwargs):
        self.connection_string = connection_string
        self.query = query
        self.kwargs = kwargs
    
    def read(self) -> pd.DataFrame:
        print(f"Executing query on {self.connection_string}")
        return pd.DataFrame({
            "source": ["database"],
            "query": [self.query]
        })

class DataSourceFactory:
    @staticmethod
    def create(source_type: str, **config) -> DataSource:
        if source_type == "csv":
            return CSVSource(path=config["path"], **config)
        elif source_type == "parquet":
            return ParquetSource(path=config["path"], **config)
        elif source_type == "database":
            return DatabaseSource(
                connection_string=config["connection_string"],
                query=config["query"],
                **config
            )
        else:
            raise ValueError(f"Unknown source type: {source_type}")

# Test
csv_source = DataSourceFactory.create("csv", path="data.csv")
df = csv_source.read()
print(df)

db_source = DataSourceFactory.create(
    "database",
    connection_string="postgresql://localhost/db",
    query="SELECT * FROM users"
)
df = db_source.read()
print(df)

## Solution 3: Serialization Strategy

In [None]:
from abc import ABC, abstractmethod
import json
import pickle
from typing import Any

class Serializer(ABC):
    @abstractmethod
    def serialize(self, data: Any) -> bytes:
        pass
    
    @abstractmethod
    def deserialize(self, data: bytes) -> Any:
        pass

class JSONSerializer(Serializer):
    def serialize(self, data: Any) -> bytes:
        return json.dumps(data).encode('utf-8')
    
    def deserialize(self, data: bytes) -> Any:
        return json.loads(data.decode('utf-8'))

class PickleSerializer(Serializer):
    def serialize(self, data: Any) -> bytes:
        return pickle.dumps(data)
    
    def deserialize(self, data: bytes) -> Any:
        return pickle.loads(data)

class ParquetSerializer(Serializer):
    def serialize(self, data: pd.DataFrame) -> bytes:
        return data.to_parquet()
    
    def deserialize(self, data: bytes) -> pd.DataFrame:
        import io
        return pd.read_parquet(io.BytesIO(data))

class DataStore:
    def __init__(self, serializer: Serializer):
        self.serializer = serializer
        self._storage: dict[str, bytes] = {}
    
    def save(self, key: str, data: Any) -> None:
        self._storage[key] = self.serializer.serialize(data)
    
    def load(self, key: str) -> Any:
        if key not in self._storage:
            raise KeyError(f"Key '{key}' not found")
        return self.serializer.deserialize(self._storage[key])

# Test with different serializers
data = {"name": "Alice", "age": 30}

# JSON
json_store = DataStore(JSONSerializer())
json_store.save("user", data)
loaded = json_store.load("user")
print(f"JSON: {loaded}")

# Pickle
pickle_store = DataStore(PickleSerializer())
pickle_store.save("user", data)
loaded = pickle_store.load("user")
print(f"Pickle: {loaded}")

# Parquet
df = pd.DataFrame({"x": [1, 2, 3]})
parquet_store = DataStore(ParquetSerializer())
parquet_store.save("data", df)
loaded_df = parquet_store.load("data")
print(f"Parquet:\n{loaded_df}")

## Solution 4: Query Builder

In [None]:
class QueryBuilder:
    def __init__(self):
        self._select_cols: list[str] = []
        self._from: str | None = None
        self._where: list[str] = []
        self._group_by: list[str] = []
        self._order_by: list[str] = []
        self._limit: int | None = None
    
    def select(self, *columns: str) -> 'QueryBuilder':
        self._select_cols.extend(columns)
        return self
    
    def from_table(self, table: str) -> 'QueryBuilder':
        self._from = table
        return self
    
    def where(self, condition: str) -> 'QueryBuilder':
        self._where.append(condition)
        return self
    
    def group_by(self, *columns: str) -> 'QueryBuilder':
        self._group_by.extend(columns)
        return self
    
    def order_by(self, *columns: str) -> 'QueryBuilder':
        self._order_by.extend(columns)
        return self
    
    def limit(self, count: int) -> 'QueryBuilder':
        self._limit = count
        return self
    
    def build(self) -> str:
        if not self._select_cols:
            raise ValueError("SELECT columns required")
        if not self._from:
            raise ValueError("FROM table required")
        
        parts = []
        parts.append(f"SELECT {', '.join(self._select_cols)}")
        parts.append(f"FROM {self._from}")
        
        if self._where:
            parts.append(f"WHERE {' AND '.join(self._where)}")
        
        if self._group_by:
            parts.append(f"GROUP BY {', '.join(self._group_by)}")
        
        if self._order_by:
            parts.append(f"ORDER BY {', '.join(self._order_by)}")
        
        if self._limit:
            parts.append(f"LIMIT {self._limit}")
        
        return " ".join(parts)

# Test
query = (
    QueryBuilder()
    .select("name", "COUNT(*) as count")
    .from_table("users")
    .where("age > 25")
    .where("active = true")
    .group_by("name")
    .order_by("count DESC")
    .limit(10)
    .build()
)

print(query)

## Solution 5: Transform Pipeline

In [None]:
from abc import ABC, abstractmethod
from typing import Dict, Any
import pandas as pd

class Logger:
    def log(self, message: str) -> None:
        print(f"[LOG] {message}")

# Strategy: Transform interface
class Transform(ABC):
    @abstractmethod
    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
        pass

# Registry: Transform registry
class TransformRegistry:
    _transforms: Dict[str, type[Transform]] = {}
    
    @classmethod
    def register(cls, name: str):
        def decorator(transform_class: type[Transform]):
            cls._transforms[name] = transform_class
            return transform_class
        return decorator
    
    @classmethod
    def get(cls, name: str) -> type[Transform]:
        return cls._transforms[name]

# Factory: Transform factory
class TransformFactory:
    @staticmethod
    def create(name: str, **config) -> Transform:
        transform_class = TransformRegistry.get(name)
        return transform_class(**config)

# Builder: Pipeline builder
class PipelineBuilder:
    def __init__(self):
        self._steps: list[tuple[str, dict]] = []
        self._logger: Logger | None = None
    
    def add_transform(self, name: str, **config) -> 'PipelineBuilder':
        self._steps.append((name, config))
        return self
    
    def with_logger(self, logger: Logger) -> 'PipelineBuilder':
        self._logger = logger
        return self
    
    def build(self) -> 'Pipeline':
        transforms = [
            TransformFactory.create(name, **config)
            for name, config in self._steps
        ]
        return Pipeline(transforms, logger=self._logger or Logger())

# Dependency Injection: Pipeline with logger
class Pipeline:
    def __init__(self, transforms: list[Transform], logger: Logger):
        self.transforms = transforms
        self.logger = logger
    
    def execute(self, df: pd.DataFrame) -> pd.DataFrame:
        result = df
        for i, transform in enumerate(self.transforms, 1):
            self.logger.log(f"Step {i}: {transform.__class__.__name__}")
            result = transform.apply(result)
        self.logger.log("Pipeline complete")
        return result

# Define transforms
@TransformRegistry.register("filter")
class FilterTransform(Transform):
    def __init__(self, condition: str):
        self.condition = condition
    
    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
        return df.query(self.condition)

@TransformRegistry.register("uppercase")
class UppercaseTransform(Transform):
    def __init__(self, column: str):
        self.column = column
    
    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
        result = df.copy()
        result[self.column] = result[self.column].str.upper()
        return result

@TransformRegistry.register("add_column")
class AddColumnTransform(Transform):
    def __init__(self, column: str, expression: str):
        self.column = column
        self.expression = expression
    
    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
        result = df.copy()
        result[self.column] = result.eval(self.expression)
        return result

# Build and test pipeline
df = pd.DataFrame({
    "name": ["alice", "bob", "charlie"],
    "last_name": ["smith", "jones", "brown"],
    "age": [30, 20, 35]
})

pipeline = (
    PipelineBuilder()
    .with_logger(Logger())
    .add_transform("filter", condition="age > 25")
    .add_transform("uppercase", column="name")
    .add_transform("add_column", column="full_name", expression="name + ' ' + last_name")
    .build()
)

result = pipeline.execute(df)
print("\nResult:")
print(result)

## Solution 6: Pattern Recognition in Odibi

**1. Registry pattern examples:**
- Transform registry for registering data transformations
- Function registry for custom UDFs
- Connector/adapter registry for different data sources

**2. Factory pattern in `create_context()`:**
- Takes configuration and creates appropriate execution context (Spark, Pandas, etc.)
- Hides complex initialization of different engine types
- Returns common interface regardless of underlying engine

**3. Strategy pattern classes:**
- Different execution engines (SparkEngine, PandasEngine, PolarsEngine)
- Different serialization formats (JSON, Parquet, CSV writers)
- Different validation strategies

**4. Dependency Injection examples:**
- Context object passed to transforms
- Config passed to pipeline components
- Connection objects passed to data readers/writers

**5. Singletons:**
- Check if any global state managers exist
- If found, consider refactoring to dependency injection
- Singletons make testing harder and create hidden dependencies