In [128]:
import json
from enum import Enum
from typing import Optional, List
from pydantic import BaseModel, AnyHttpUrl, UUID4, model_validator
from datetime import date
from pydantic.config import ConfigDict

# Data definition

## Data section

In [129]:
class DemographicFactors(BaseModel):
    gender: str
    sex: str
    age: str
    demographic_group: str
    location: str
    socio_economics: str

In [130]:
class CatalogColumn(BaseModel):
    column_name: str
    description: Optional[str]
    type: str
    number_missing_values: int
    categories: Optional[List[str]]

In [131]:
class DQMetric(BaseModel):
    name: str
    description: str
    value: str
    reference: Optional[AnyHttpUrl]

In [132]:
class Data(BaseModel):
    id: str
    description: str
    n: int
    demographic_factors: DemographicFactors
    bias_declaration: str
    catalog: List[CatalogColumn]
    dq_assessment: List[DQMetric]
    clinicians: Optional[List[str]]
    patients: Optional[List[str]]

## Training algorithm

In [133]:
class PreprocessSteps(BaseModel):
    name: str
    description: str
    reference: AnyHttpUrl

In [134]:
class NameValue(BaseModel):
    name: str
    value: str

In [135]:
class NameVersion(BaseModel):
    name: str
    version: str

In [136]:
class ImplementationDetails(BaseModel):
    code: Optional[AnyHttpUrl]
    type_of_architecture: str
    preprocess_steps: Optional[List[PreprocessSteps]]
    hyperparameters: List[NameValue]
    programming_lang: NameVersion

In [137]:
class TrainingAlgorithm(BaseModel):
    id: str
    name: str
    description: str
    reference: Optional[AnyHttpUrl]
    implementation_details: ImplementationDetails

## Evaluation strategy

In [138]:
class RolesAndResponsibilities(BaseModel):
    user_id: str
    role_user: str
    responsibilities: str

In [139]:
class EvaluationTypeEnum(str, Enum):
    internal = 'internal'
    clinical = 'clinical'
    continual = 'continual'

In [140]:
class MeasurementTypeEnum(str, Enum):
    ai_performance = 'AI Performance'
    perceived_utility = 'Perceived Utility'
    perceived_usability = 'Perceived Usability'

In [141]:
class Metric(BaseModel):
    id: str
    name: str
    description: str
    instrument: Optional[AnyHttpUrl]
    reference: Optional[AnyHttpUrl]

In [142]:
class Step(BaseModel):
    name: str
    description: str
    reference: Optional[AnyHttpUrl]
    # List of Metric.id 
    metrics: Optional[List[str]]    

In [143]:
class EvaluationStrategy(BaseModel):
    id: str
    roles_and_responsibilities: Optional[List[RolesAndResponsibilities]]
    evaluation_type: EvaluationTypeEnum
    measurement_type: MeasurementTypeEnum
    metrics: List[Metric]
    steps: List[Step]
    code: Optional[AnyHttpUrl]
    reference: Optional[AnyHttpUrl]
    
    @model_validator(mode='after')
    def validate_metrics_fk(self):
        metrics_ids = []
        if not self.metrics:
            return
        for m in self.metrics:
            metrics_ids.append(m.id)
        # check for repeated ids
        if len(metrics_ids) > len(set(metrics_ids)):
            raise ValueError('Non unique IDs detected for Metric.id')
        # check foreign key
        if not self.steps:
            return
        for s in self.steps:
            if not s.metrics:
                continue
            for sm in s.metrics:
                if sm not in metrics_ids:
                    print(f'->{sm}')
                    raise ValueError(f'Step.metrics ID {sm} not defined before')


## AI Entity

In [144]:
class AIEntity(BaseModel):
    id: str
    manufacturer: str
    purpose: str
    release_date: date
    regulation_check: str
    ethical_declarations: List[str]
    standards: List[str]
    certifications: List[str]
    encryption: str
    field_tested_libraries: bool

In [145]:
class SaMD(BaseModel):
    ai_entity: str
    samd_name: str
    samd_clinical_purpose: str
    models: List[str]

In [146]:
class FoundationalModel(BaseModel):
    ai_entity: str
    name: str
    description: str
    doi: AnyHttpUrl

In [147]:
class XAI(BaseModel):
    name: str
    description: str
    reference: AnyHttpUrl

In [148]:
class ExternalDocument(BaseModel):
    document_id: str
    document_name: str
    document_reference: AnyHttpUrl

In [149]:
class Training(BaseModel):
    training_algorithm: str
    training_datasets: List[str]
    quality_roles_and_responsibilities: Optional[ExternalDocument]
    # test this
    training_start_date: date
    training_end_date: date

In [150]:
class AIModel(BaseModel):
    ai_entity: str
    foundational_model: Optional[str]
    xai_mechanism: List[XAI]
    training: Training

## Evaluations

In [151]:
class IDValue(BaseModel):
    id: str
    value: str

In [152]:
class Evaluation(BaseModel):
    ai_entity: str
    date_start: date
    date_end: date
    description: str
    evaluation_strategy: str
    datasets: Optional[List[str]]
    participants: Optional[List[str]]
    values: List[IDValue]

## Complete Passport

In [153]:
class AIPassport(BaseModel):
    passport_uuid: UUID4
    data: Optional[List[Data]]
    training_algorithm: Optional[TrainingAlgorithm]
    evaluation_strategies: Optional[List[EvaluationStrategy]]
    ai_entities: Optional[List[AIEntity]]
    SaMD: Optional[SaMD]
    foundational_model: Optional[FoundationalModel]
    ai_models: Optional[List[AIModel]]
    evaluations: List[Evaluation]

In [156]:
passport = AIPassport.model_json_schema()
with open('../passport_validator.json', 'w') as file:
    json.dump(passport, file, indent=2)

# Testing

## SaMD example

In [None]:
with open ('../palliative_care_example/aleph_pc.json', 'r') as file:
    samd_json = json.load(file)
    AIPassport(**samd_json)

## Model example

In [None]:
with open ('../palliative_care_example/one_year_mortality.json', 'r') as file:
    oym_json = json.load(file)
    AIPassport(**oym_json)

In [None]:
with open ('../palliative_care_example/regression.json', 'r') as file:
    regression_json = json.load(file)
    AIPassport(**regression_json)

In [None]:
with open ('../palliative_care_example/one_year_frailty.json', 'r') as file:
    frailty_json = json.load(file)
    AIPassport(**frailty_json)

<b style="color:red">Test for the passport human-readable file should fail in several points because all the explanations are in str format</b>

In [155]:
with open ('../human_readable_structure.json', 'r') as file:
    struct_json = json.load(file)
    AIPassport(**struct_json)

ValidationError: 10 validation errors for AIPassport
data.0.n
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='int', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/int_parsing
data.0.catalog.0.number_missing_values
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='int', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/int_parsing
evaluation_strategies.0.evaluation_type
  Input should be 'internal', 'clinical' or 'continual' [type=enum, input_value='categorical: internal | clinical | continual', input_type=str]
evaluation_strategies.0.measurement_type
  Input should be 'AI Performance', 'Perceived Utility' or 'Perceived Usability' [type=enum, input_value='categorical: AI performa...y | Perceived usability', input_type=str]
ai_entities.0.release_date
  Input should be a valid date or datetime, invalid character in year [type=date_from_datetime_parsing, input_value='str:date in yyyy-mm-dd format', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/date_from_datetime_parsing
ai_entities.0.field_tested_libraries
  Input should be a valid boolean, unable to interpret input [type=bool_parsing, input_value='bool', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/bool_parsing
ai_models.0.training.training_start_date
  Input should be a valid date or datetime, invalid character in year [type=date_from_datetime_parsing, input_value='str:date in yyyy-mm-dd format', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/date_from_datetime_parsing
ai_models.0.training.training_end_date
  Input should be a valid date or datetime, invalid character in year [type=date_from_datetime_parsing, input_value='str:date in yyyy-mm-dd format', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/date_from_datetime_parsing
evaluations.0.date_start
  Input should be a valid date or datetime, invalid character in year [type=date_from_datetime_parsing, input_value='str:date in yyyy-mm-dd format', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/date_from_datetime_parsing
evaluations.0.date_end
  Input should be a valid date or datetime, invalid character in year [type=date_from_datetime_parsing, input_value='str:date in yyyy-mm-dd format', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/date_from_datetime_parsing