# Transparency Artefacts: Metadata Records and Reports

This tutorial demonstrate the use of FAID's metadata recording and reporting formats.

In [1]:
import sys
sys.path.append('../../')
# All logging related functions can be called from the faid.logging module
# All report related functions can be called from the faid.report module
from faid import logging as faidlog
from faid.report import generate_model_card_report, generate_data_card_report, generate_risk_register_report, generate_transparency_report, generate_experiment_overview_report, generate_all_reports

In [2]:
experiment_name = "metadata-test"
# Use `test=True` to initiate the log files with sample placeholder values
faidlog.init_log(test=True)

[93mModel log file already exists.  Logging will be appended to the existing file.[0m
[93mData log file already exists. Logging will be appended to the existing file.[0m
[93mRisks log file already exists. Logging will be appended to the existing file.[0m
[93mTransparency log file already exists. Logging will be appended to the existing file.[0m


## Using Model Card and Report

In [5]:
model_info = faidlog.ModelCard()

In [6]:
model_info.get_model_details()

{'name': 'Example Model',
 'overview': 'This model is designed to predict customer churn.',
 'documentation': 'https://example.com/model-docs',
 'owners': [{'name': 'John Doe', 'contact': 'john.doe@example.com'}],
 'version': {'name': 'v1.0', 'date': '2023-10-01', 'diff': 'Initial release.'},
 'license': {'identifier': 'MIT',
  'custom_text': 'Custom license text if applicable.'},
 'references': 'https://example.com/references',
 'citation': 'Doe, J. (2023). Example Model. Example Journal.',
 'path': '/models/example_model'}

In [7]:
generate_model_card_report()

## Using Data Card and Report

In [8]:
datacard = faidlog.DataCard()

In [9]:
datacard.get_description()

{'name': 'Example Dataset',
 'summary': 'This dataset contains information about...',
 'dataset_link': 'http://example.com/dataset',
 'repository_link': 'http://example.com/repository',
 'intro_paper': 'http://example.com/paper',
 'publishing_organization': 'Example Organization',
 'tasks': ['classification', 'regression'],
 'characteristics': ['structured', 'time-series'],
 'feature_types': ['numerical', 'categorical'],
 'target_col': 'target',
 'index_col': 'id',
 'year_of_dataset_creation': '2023',
 'last_updated': '2023-10-01',
 'industry_types': ['healthcare', 'finance'],
 'publishing_poc': {'name': 'John Doe', 'email': 'john.doe@example.com'},
 'owners': [{'name': 'Jane Smith', 'email': 'jane.smith@example.com'}],
 'authors': [{'name': 'John Doe', 'email': 'john.doe@example.com'}],
 'funding_sources': ['Grant XYZ', 'Organization ABC']}

In [10]:
generate_data_card_report()

## Using the Risk Register

Risk register works differently. It doesn't create an object that you need to save to sync it with the metadata record. You can add the risks, issues, assumptions, and dependencies asynchronously.

In [11]:
faidlog.get_risk_entries()

{0: {'description': 'Potential risk of data breach',
  'impact': 'High',
  'likelihood': 'Medium',
  'mitigation': 'Implement stronger encryption methods'},
 1: {'description': 'Test Risk Entry',
  'impact': 'Test Impact',
  'likelihood': '0.5',
  'mitigation': 'Test Mitigation'}}

In [None]:
faidlog.add_risk_entry(description="Test Risk Entry", impact="Test Impact", likelihood="0.5", mitigation="Test Mitigation")

In [13]:
faidlog.get_risk_entries()

{0: {'description': 'Potential risk of data breach',
  'impact': 'High',
  'likelihood': 'Medium',
  'mitigation': 'Implement stronger encryption methods'},
 1: {'description': 'Test Risk Entry',
  'impact': 'Test Impact',
  'likelihood': '0.5',
  'mitigation': 'Test Mitigation'}}

## Transparency Record

The transparency record is generated automatially using the all other transparency artefacts including model card, data card, and risk register.

In [1]:
import sys
sys.path.append('../../')
# All logging related functions can be called from the faid.logging module
# All report related functions can be called from the faid.report module
from faid import logging as faidlog
from faid.report import generate_model_card_report, generate_data_card_report, generate_risk_register_report, generate_transparency_report, generate_experiment_overview_report, generate_all_reports

In [2]:
faidlog.get_transparency_record()["risks_mitigations_and_impact_assessments"]

{'impact_assessment': 'The project risks has the following impact descriptions: \nHighTest Impact',
 'risks_and_mitigations': 'The project listed the following mitigation strategies: \nImplement stronger encryption methodsTest Mitigation\n\nThe project risks has the following descriptions: \nPotential risk of data breachTest Risk Entry'}

In [3]:
faidlog.sync_risk_to_transparency()

{0: {'description': 'Potential risk of data breach', 'impact': 'High', 'likelihood': 'Medium', 'mitigation': 'Implement stronger encryption methods'}, 1: {'description': 'Test Risk Entry', 'impact': 'Test Impact', 'likelihood': '0.5', 'mitigation': 'Test Mitigation'}}
Added the transparency entry: {'impact_assessment': 'The project risks has the following impact descriptions: \nHighTest Impact', 'risks_and_mitigations': 'The project listed the following mitigation strategies: \nImplement stronger encryption methodsTest Mitigation\n\nThe project risks has the following descriptions: \nPotential risk of data breachTest Risk Entry'}.


In [5]:
faidlog.get_transparency_record()["data_specification"]

{'source_data_name': 'Y Council school applicants',
 'data_modality': 'Tabular',
 'data_description': 'These data provide information about school applicants which is relevant to Y Council’s admissions policy.',
 'data_quantities': 'Each year, the dataset contains approximately 3000 samples with around 40 attributes each.',
 'sensitive_attributes': 'Name, Age, Address, Sex, Religion, Disability, Parent or guardian name(s)',
 'data_completeness_and_representative-ness': 'Datasets are always ensured to be complete before being processed by the model.',
 'source_data_url': 'N/A - the data contains personal data and cannot be made public.',
 'data_collection': 'Collected retrospectively from existing schools and ongoing year-by-year data from students and schools.',
 'data_cleaning': 'Pre-processing and cleaning performed by AI Tools UK.',
 'data_sharing_agreements': 'A data sharing agreement has been put in place between Y Council and AI Tools UK.',
 'data_access_and_storage': 'Data acces

In [2]:
faidlog.sync_data_to_transparency()

Added the transparency entry: {'source_data_name': 'Example Dataset', 'data_modality': 'text', 'data_description': 'Detailed description of the dataset...', 'data_quantities': '10000', 'sensitive_attributes': ['age', 'gender'], 'data_completeness_and_representative_ness': 'yes', 'source_data_url': 'http://example.com/dataset', 'data_collection': 'Data collected from surveys...', 'data_cleaning': '', 'data_sharing_agreements': '', 'data_access_and_storage': ''}.


In [2]:
faidlog.get_transparency_record()["model_specification"]

{'model_name': 'Y-Admissions',
 'model_version': 'v4.1',
 'model_task': 'To allocate a fixed number of secondary school places to a set of applicants.',
 'model_input': 'A dataset containing information about school applicants and a dataset containing the number and types of places offered by local state schools.',
 'model_output': 'A dataset with school allocations for every applicant, along with brief written explanations for why each allocation has been given.',
 'model_architecture': 'Y-Admissions is an optimisation-based automated planning model.',
 'model_performance': 'Regular verification tests to ensure the model is executable and valid.',
 'datasets': 'Datasets containing information about school applicants and datasets containing the number and types of places offered by local state schools.',
 'dataset_purposes': 'Used to test the robustness and fairness of the model.'}

In [2]:
faidlog.sync_model_to_transparency()

Added the transparency entry: {'model_name': 'Example Model', 'model_version': 'Name: v1.0 | Date: 2023-10-01 | Diff: Initial release.', 'model_task': 'This model is designed to predict customer churn.', 'model_input': 'CSV', 'model_output': 'JSON', 'model_architecture': 'Neural Network', 'model_performance': '', 'datasets': 'Training data for the model.', 'dataset_purposes': ''}.


In [3]:
faidlog.get_transparency_record()["model_specification"]

{'model_name': 'Example Model',
 'model_version': 'Name: v1.0 | Date: 2023-10-01 | Diff: Initial release.',
 'model_task': 'This model is designed to predict customer churn.',
 'model_input': 'CSV',
 'model_output': 'JSON',
 'model_architecture': 'Neural Network',
 'model_performance': '',
 'datasets': 'Training data for the model.',
 'dataset_purposes': ''}