In [10]:
from avidtools.datamodels.report import Report
from avidtools.datamodels.components import *

from avidtools.connectors.atlas import *

In [2]:
import requests
import yaml

ATLAS_HOME = 'https://raw.githubusercontent.com/mitre-atlas/atlas-data/main/data/case-studies/'

def import_case_study(case_study_id):
    req = requests.get(ATLAS_HOME+case_study_id+'.yaml')
    cs = yaml.safe_load(req.content)
    return cs

In [12]:
cs = import_case_study('AML.CS0000')

In [4]:
def convert_case_study(case_study, save_location=None):
    report = Report()
    
    report.affects = Affects(
        developer = [],
        deployer = [case_study['target']],
        artifacts = Artifact(
            type = ArtifactTypeEnum.system,
            name = case_study['target']
        )
    )    
    
    report.problemtype = Problemtype(
        classof = ClassEnum.atlas,
        type = TypeEnum.advisory,
        description = LangValue(
            lang = 'eng',
            value = case_study['name']
        )
    )
    
    report.references = [
        Reference(
            type = 'source',
            label = cs['name'],
            url = 'https://atlas.mitre.org/studies/'+cs['id']
        )
    ] + [
        Reference(
            type = 'source',
            label = ref['title'],
            url = ref['url']
        )
        for ref in case_study['references']
    ]
    
    report.description = LangValue(
        lang = 'eng',
        value = case_study['summary']
    )
    
    if 'reporter' in list(case_study.keys()):
        report.credit = LangValue(
            lang = 'eng',
            value = case_study['reporter']
        )
        
    report.reported_date = cs['incident-date']

    return report

In [7]:
report = convert_case_study(cs)

In [8]:
report.save('sample.json')

In [None]:
from pydantic import BaseModel, AnyUrl
from enum import Enum
from typing import Dict, List
from typing_extensions import TypedDict
from datetime import datetime

In [None]:
class LangValue(TypedDict):
    lang: str
    value: str

class ArtifactTypeEnum(str, Enum):
    dataset = 'dataset'
    model = 'model'
    system = 'system'
    
class SepEnum(str, Enum):
    S0100: 'Software Vulnerability'
    S0200: 'Supply Chain Compromise'
    S0201: 'Model Compromise'
    S0202: 'Software Compromise'
    S0300: 'Over-permissive API'
    S0301: 'Information Leak'
    S0302: 'Excessive Queries'
    S0400: 'Model Bypass'
    S0401: 'Bad Features'
    S0402: 'Insufficient Training Data'
    S0403: 'Adversarial Example'
    S0500: 'Exfiltration'
    S0501: 'Model inversion'
    S0502: 'Model theft'
    S0600: 'Data Poisoning'
    S0601: 'Ingest Poisoning'
    E0100: 'Bias/ Discrimination'
    E0101: 'Group fairness'
    E0102: 'Individual fairness'
    E0200: 'Explainability'
    E0201: 'Global explanations'
    E0202: 'Local explanations'
    E0300: 'User actions'
    E0301: 'Toxicity'
    E0302: 'Polarization/ Exclusion'
    E0400: 'Misinformation'
    E0401: 'Deliberative Misinformation'
    E0402: 'Generative Misinformation'
    P0100: 'Data issues'
    P0101: 'Data drift'
    P0102: 'Concept drift'
    P0103: 'Data entanglement'
    P0104: 'Data quality issues'
    P0105: 'Feedback loops'
    P0200: 'Robustness'
    P0201: 'Resilience/ Stability'
    P0202: 'OOD generalization'
    P0203: 'Scaling'
    P0300: 'Privacy'
    P0301: 'Anonymization'
    P0302: 'Randomization'
    P0303: 'Encryption'
    P0400: 'Safety'
    P0401: 'Psychological Safety'
    P0402: 'Physical safety'
    P0403: 'Socioeconomic safety'
    P0404: 'Environmental safety'
        
class LifecycleEnum(str, Enum):
    L01: 'L01: Business Understanding'
    L02: 'L02: Data Understanding'
    L03: 'L03: Data Preparation'
    L04: 'L04: Model Development'
    L05: 'L05: Evaluation'
    L06: 'L06: Deployment'

class ClassEnum(str, Enum):
    aiid: 'AIID Incident'
    atlas: 'ATLAS Case Studies'
    cve: 'CVE Entry'
    llm: 'LLM Evaluation'
        
class TypeEnum(str, Enum):
    issue = 'Issue'
    advisory = 'Advisory'
    measurement = 'Measurement'
    detection = 'Detection'
    
class MethodEnum(str, Enum):
    test: 'Significance Test'
    thres: 'Static Threshold'
    
class Artifact(TypedDict):
    type: ArtifactTypeEnum
    name: str

class Metadata(TypedDict):
    report_id: str

class Affects(TypedDict):
    developer: List[str]
    deployer: List[str]
    artifacts: List[Artifact]
        
class Problemtype(TypedDict):
    classof: ClassEnum
    type: TypeEnum
    description: LangValue
        
class Metric(TypedDict):
    name: str
    features: Dict
    detection_method: Dict[MethodEnum, str]

class Reference(TypedDict):
    type: str
    label: str
    url: str # AnyUrl is a better fit, but keeping this because submissions are not standard yet

class AvidTaxonomy(TypedDict):
    vuln_id: Optional[str]
    risk_domain: List[str]
    sep_view: List[SepEnum]
    lifecycle_view: List[LifecycleEnum]
    taxonomy_version: str
        
class Impact(TypedDict):
    avid: AvidTaxonomy

class Report(BaseModel):
    data_type: str = 'AVID'
    data_version: str = None
    metadata: Metadata = None
    affects: Affects = None
    problemtype: str = None
    metrics: List[Metric] = None
    references: List[Reference] = None
    description: LangValue = None
    impact: Impact = None
    credit: List[LangValue] = None
    reported_date: datetime = None

In [None]:
from json2html import *
import json

In [None]:
# j = json.load(open('./vulnerabilities/2022/AVID-2022-V001.json'))
j = json.load(open('Untitled.txt'))

In [None]:
with open("sample.json", "w") as outfile:
    outfile.write(json.dumps(j))

In [None]:
json2html.convert(json = j, table_attributes="id=\"info-table\" class=\"table table-bordered table-hover\"")

In [None]:
j = json.load(open('test.json'))

In [None]:
import jsonschema2md

parser = jsonschema2md.Parser(
    examples_as_yaml=False,
    show_examples="all",
)
md_lines = parser.parse_schema(j)
print(''.join(md_lines))

In [None]:
import pyairtable
import os
from dotenv import load_dotenv
import json

load_dotenv()
api_key = os.getenv('AIRTABLE_API_KEY')
config = json.load(open('../connectors/config.json'))

In [None]:
# fetch data from airtable
all_data = pyairtable.Table(api_key, config['airtable']['base_id'], config['airtable']['table_name']).all()

In [None]:
def save_as_report(record):
    
    report = {}
    report['data_type'] = 'AVID'
    report['version'] = ''
    report['metadata'] = {
        'report_id' : ''
    }
    report['affects'] = {
        'developer': record['fields']['Developer of Artifact'],
        'deployer': record['fields']['Deployer of Artifact'],
        'artifact': [
            {
                'type': record['fields']['Artifact Type'],
                'name': record['fields']['Artifact Name']
            }
        ]
    }
    report['problemtype'] = {
        'class': '',
        'type': record['fields']['Report Type'].split(':')[0],
        'description': {
            'lang': 'eng',
            'value': record['fields']['Title']
        }
    }
    report['metrics'] = []
    report['references'] = record['fields']['References']
    report['description'] = {
        'lang': 'eng',
        'value': record['fields']['Description']
    }
    report['impact'] = {
        'avid': {
            'vuln_id': '',
            'risk_domain': record['fields']['Relevant SEP risk domains'],
            'sep_view': record['fields']['Relevant Ethics subcategories'],
            'lifecycle_view': record['fields']['Relevant stages of the AI lifecycle']
        }
    }
    report['credits'] = [
        {
            'lang': 'eng',
            'value': record['fields']['Submitter Name']
        }
    ]
    report['reported_date'] = record['createdTime'].split('T')[0]
    
    # save report
    output = open('../reports/dev/'+record['id']+'.json', 'w')
    json.dump(report, output, indent=4)
    output.close()

In [None]:
record = all_data[3].copy()

# fill essential empty entries
record_keys = list(record['fields'].keys())
strings = ['Submitter Organization'] + ['Relevant '+s+' subcategories' for s in ['Security','Ethics','Performance']]
for st in strings:
    if record_keys.count(st)==0:
        record[st] = ''

record

In [None]:
import requests
import jsoneditor

data = requests.get('https://jsonplaceholder.typicode.com/comments').json()
jsoneditor.editjson(data)

In [None]:
data[1]