# Typing Module

> Field Metadata for python's `t.Annotate`.

In [1]:
#| export
import typing as t
from enum import Enum
from pydantic import BaseModel, create_model
from datetime import datetime, date
import inspect

## Supported Types

Here we have the supported types and meta-types.

In [2]:
#| export
class ColumnType(str, Enum):
    """Column types supported by the Ragas API."""
    NUMBER = "number"
    TEXT = "text"
    SELECT = "select"
    MULTI_SELECT = "multiSelect"
    CHECKBOX = "checkbox"
    DATE = "date"
    CUSTOM = "custom"


In [3]:
#| export
class FieldMeta:
    """Base metadata for field type annotations."""
    def __init__(self, type, required=True, **settings):
        self.type = type
        self.required = required
        self.settings = settings.copy()

In [4]:
#| export
class Number(FieldMeta):
    """Number field metadata."""
    def __init__(self, min_value: t.Optional[float] = None, max_value: t.Optional[float] = None, required: bool = True):
        settings = {}
        if min_value is not None or max_value is not None:
            settings["range"] = {}
            if min_value is not None:
                settings["range"]["min"] = min_value
            if max_value is not None:
                settings["range"]["max"] = max_value
        super().__init__(ColumnType.NUMBER, required, **settings)


In [5]:
#| export
class Text(FieldMeta):
    """Text field metadata."""
    def __init__(self, max_length: int = 1000, required: bool = True):
        settings = {}
        if max_length is not None:
            settings["max_length"] = max_length
        super().__init__(ColumnType.TEXT, required, **settings)

In [6]:
#| export
class Select(FieldMeta):
    """Select field metadata."""
    def __init__(self, options: t.Optional[t.List[str]] = None, required: bool = True):
        settings = {}
        if options:
            settings["options"] = [{"name": option} for option in options]
        super().__init__(ColumnType.SELECT, required, **settings)

In [7]:
#| export
class MultiSelect(FieldMeta):
    """MultiSelect field metadata."""
    def __init__(self, options: t.Optional[t.List[str]] = None, required: bool = True):
        settings = {}
        if options:
            settings["options"] = [{"name": option} for option in options]
        super().__init__(ColumnType.MULTI_SELECT, required, **settings)


In [8]:
#| export
class Checkbox(FieldMeta):
    """Checkbox field metadata."""
    def __init__(self, required: bool = True):
        super().__init__(ColumnType.CHECKBOX, required)


In [9]:
#| export
class Date(FieldMeta):
    """Date field metadata."""
    def __init__(self, include_time: bool = False, required: bool = True):
        settings = {}
        if include_time:
            settings["include_time"] = include_time
        super().__init__(ColumnType.DATE, required, **settings)


In [10]:

#| export
class Custom(FieldMeta):
    """Custom field metadata."""
    def __init__(self, custom_type: str = "", required: bool = True):
        settings = {}
        if custom_type:
            settings["type"] = custom_type
        super().__init__(ColumnType.CUSTOM, required, **settings)

## Model Converter

In [11]:

class ModelConverter:
    """Convert Pydantic models to Ragas API columns and rows."""
    
    @staticmethod
    def infer_field_type(annotation, field_info=None):
        """Infer field type from Python type annotation."""
        # Check for Annotated with our custom metadata
        origin = t.get_origin(annotation)
        args = t.get_args(annotation)
        
        # If this is an Annotated field with our metadata
        if origin is t.Annotated and len(args) > 1:
            for arg in args[1:]:
                if isinstance(arg, FieldMeta):
                    return arg
            
            # If no field metadata found, infer from the base type
            return ModelConverter.infer_field_type(args[0], field_info)
        
        # Handle Optional, List, etc.
        if origin is t.Union:
            if type(None) in args:
                # This is Optional[T]
                non_none_args = [arg for arg in args if arg is not type(None)]
                if len(non_none_args) == 1:
                    # Get the field type of the non-None arg
                    field_meta = ModelConverter.infer_field_type(non_none_args[0], field_info)
                    field_meta.required = False
                    return field_meta
        
        # Handle List and array types
        # NOTE: here we are converting lists to strings, except for literal types
        if origin is list or origin is t.List:
            if len(args) > 0:
                # Check if it's a list of literals
                if t.get_origin(args[0]) is t.Literal:
                    literal_options = t.get_args(args[0])
                    return MultiSelect(options=list(literal_options))
                # Otherwise just a regular list
                return Text()  # Default to Text for lists
        
        # Handle Literal
        if origin is t.Literal:
            return Select(options=list(args))
        
        # Basic type handling
        if annotation is str:
            return Text()
        elif annotation is int or annotation is float:
            return Number()
        elif annotation is bool:
            return Checkbox()
        elif annotation is datetime or annotation is date:
            return Date(include_time=annotation is datetime)
        
        # Default to Text for complex or unknown types
        return Text()
    
    @classmethod
    def model_to_columns(cls, model_class):
        """Convert a Pydantic model class to Ragas API column definitions."""
        columns = []
        for field_name, field_info in model_class.model_fields.items():
            # Get the field's type annotation
            annotation = field_info.annotation
            
            # Try to get field metadata
            field_meta = cls.infer_field_type(annotation, field_info)
            
            # Create column definition
            column = {
                "id": field_name,
                "name": field_name,
                "type": field_meta.type.value,
                "settings": field_meta.settings.copy()
            }
            
            columns.append(column)
        
        return columns
    
    @classmethod
    def instance_to_row(cls, instance, model_class=None):
        """Convert a Pydantic model instance to a Ragas API row."""
        if model_class is None:
            model_class = instance.__class__
        
        row_cells = []
        model_data = instance.model_dump()
        
        for field_name, field_info in model_class.model_fields.items():
            if field_name in model_data:
                value = model_data[field_name]
                # Process value based on field type
                annotation = field_info.annotation
                field_meta = cls.infer_field_type(annotation, field_info)
                
                # Special handling for various types
                if field_meta.type == ColumnType.MULTI_SELECT and isinstance(value, list):
                    # Convert list to string format accepted by API
                    processed_value = value
                elif field_meta.type == ColumnType.DATE and isinstance(value, (datetime, date)):
                    # Format date as string
                    processed_value = value.isoformat()
                else:
                    processed_value = value
                
                row_cells.append({
                    "column_id": field_name,
                    "data": processed_value
                })
        
        return {
            "data": row_cells
        }
    
    @classmethod
    def instances_to_rows(cls, instances, model_class=None):
        """Convert multiple Pydantic model instances to Ragas API rows."""
        if not instances:
            return []
        
        if model_class is None and instances:
            model_class = instances[0].__class__
        
        return [cls.instance_to_row(instance, model_class) for instance in instances]

In [12]:
# Let's test the ModelConverter
# First, let's define a test model with various field types
class TestModel(BaseModel):
    # Basic types
    id: int
    name: str
    is_active: bool
    created_at: datetime
    
    # Optional fields
    optional_text: t.Optional[str] = None
    
    # Lists
    tags: t.List[str] = []
    
    # Literal types
    status: t.Literal["pending", "active", "completed"] = "pending"
    
    # Annotated types with our field metadata
    score: t.Annotated[float, Number(min_value=0, max_value=100)]
    description: t.Annotated[str, Text(max_length=500)]
    category: t.Annotated[t.Literal["A", "B", "C"], Select(options=["A", "B", "C"])]
    features: t.Annotated[t.List[str], MultiSelect(options=["feature1", "feature2", "feature3"])]
    
# Now let's create some test instances
test_instances = [
    TestModel(
        id=1,
        name="Test Item 1",
        is_active=True,
        created_at=datetime.now(),
        score=85.5,
        description="This is a test description for item 1",
        category="A",
        features=["feature1", "feature3"],
        tags=["tag1", "tag2"],
        status="active"
    ),
    TestModel(
        id=2,
        name="Test Item 2",
        is_active=False,
        created_at=datetime.now(),
        optional_text="This is optional",
        score=42.0,
        description="A shorter description",
        category="B",
        features=["feature2"],
        status="completed"
    )
]


In [13]:
# Test the model_to_columns method
print("Testing model_to_columns:")
columns = ModelConverter.model_to_columns(TestModel)
for col in columns:
    print(f"- {col['name']} ({col['type']}): {col['settings']}")


Testing model_to_columns:
- id (number): {}
- name (text): {'max_length': 1000}
- is_active (checkbox): {}
- created_at (date): {'include_time': True}
- optional_text (text): {'max_length': 1000}
- tags (text): {'max_length': 1000}
- status (select): {'options': [{'name': 'pending'}, {'name': 'active'}, {'name': 'completed'}]}
- score (number): {}
- description (text): {'max_length': 1000}
- category (select): {'options': [{'name': 'A'}, {'name': 'B'}, {'name': 'C'}]}
- features (text): {'max_length': 1000}


In [14]:

print("\nTesting instance_to_row:")
# Test the instance_to_row method
row = ModelConverter.instance_to_row(test_instances[0])
for cell in row["data"]:
    print(f"- {cell['column_id']}: {cell['data']}")



Testing instance_to_row:
- id: 1
- name: Test Item 1
- is_active: True
- created_at: 2025-04-09T13:13:28.710666
- optional_text: None
- tags: ['tag1', 'tag2']
- status: active
- score: 85.5
- description: This is a test description for item 1
- category: A
- features: ['feature1', 'feature3']


In [15]:

print("\nTesting instances_to_rows:")
# Test the instances_to_rows method
rows = ModelConverter.instances_to_rows(test_instances)
print(f"Generated {len(rows)} rows")

# Test type inference
print("\nTesting type inference:")
types = [
    (int, "Number"),
    (str, "Text"),
    (bool, "Checkbox"),
    (datetime, "Date"),
    (t.Optional[str], "Text (not required)"),
    (t.List[str], "Text"),
    (t.Literal["a", "b"], "Select"),
    (t.List[t.Literal["x", "y"]], "MultiSelect"),
    (t.Annotated[int, Number(min_value=10)], "Number with min=10")
]

for annotation, expected in types:
    field_meta = ModelConverter.infer_field_type(annotation)
    print(f"- {annotation}: {field_meta.type.value} (Required: {field_meta.required})")
    if hasattr(field_meta, "min_value") and field_meta.min_value is not None:
        print(f"  - Min value: {field_meta.min_value}")
    if "options" in field_meta.settings:
        print(f"  - Options: {[opt['name'] for opt in field_meta.settings['options']]}")


Testing instances_to_rows:
Generated 2 rows

Testing type inference:
- <class 'int'>: number (Required: True)
- <class 'str'>: text (Required: True)
- <class 'bool'>: checkbox (Required: True)
- <class 'datetime.datetime'>: date (Required: True)
- typing.Optional[str]: text (Required: False)
- typing.List[str]: text (Required: True)
- typing.Literal['a', 'b']: select (Required: True)
  - Options: ['a', 'b']
- typing.List[typing.Literal['x', 'y']]: multiSelect (Required: True)
  - Options: ['x', 'y']
- typing.Annotated[int, <__main__.Number object at 0x1229a73b0>]: number (Required: True)


## Unit Tests

In [18]:
# 1. Test the actual implementation with basic fields
class BasicModel(BaseModel):
    id: int
    name: str
    created_at: datetime
    
    # No custom metadata - just test basic type inference

# 2. Test with selective metadata where it works
class PartialMetadataModel(BaseModel):
    # Only use metadata that works with your current implementation
    status: t.Literal["pending", "active", "completed"]  # Test literal type

# 3. Document current limitations
# print("Note: Currently, using complex metadata classes directly in t.Annotated causes validation issues")

# Test the actual implementation
def test_basic_type_inference():
    """Test basic type inference without custom metadata."""
    # Get column definitions
    columns = ModelConverter.model_to_columns(BasicModel)
    print(columns)
    
    # Find columns and check their properties
    id_col = next((c for c in columns if c["id"] == "id"), None)
    assert id_col is not None, "id column should exist"
    assert id_col["type"] == ColumnType.NUMBER.value, "id should map to NUMBER type"
    
    name_col = next((c for c in columns if c["id"] == "name"), None)
    assert name_col is not None, "name column should exist"
    assert name_col["type"] == ColumnType.TEXT.value, "name should map to TEXT type"
    
    date_col = next((c for c in columns if c["id"] == "created_at"), None)
    assert date_col is not None, "created_at column should exist"
    assert date_col["type"] == ColumnType.DATE.value, "datetime should map to DATE type"

# Run the tests with what actually works
test_basic_type_inference()

[{'id': 'id', 'name': 'id', 'type': 'number', 'settings': {}}, {'id': 'name', 'name': 'name', 'type': 'text', 'settings': {'max_length': 1000}}, {'id': 'created_at', 'name': 'created_at', 'type': 'date', 'settings': {'include_time': True}}]
