In [31]:
import requests
import pandas as pd

# Loading JSON schema file from Github

def load_json_from_url(url):
    response = requests.get(url)
    return response.json()

# Resolves JSON schema $refs based on definitions

def resolve_ref(ref, definitions):
    ref_path = ref.strip('#/').split('/')
    data = definitions
    for step in ref_path:
        data = data.get(step, {})
    return data



In [29]:
def extract_schema_data_statement(schema, definitions, path='', required_fields=[]):
    rows = []
    if 'properties' in schema:
        properties = schema['properties']
        for prop_name, prop_details in properties.items():
            full_path = f"{path}.{prop_name}" if path else prop_name
            is_required = prop_name in required_fields
            if '$ref' in prop_details:
                resolved_details = resolve_ref(prop_details['$ref'], definitions)
                rows += extract_schema_data_statement(resolved_details, definitions, full_path, required_fields)
            else:
                prop_type = prop_details.get('type', 'N/A')
                description = prop_details.get('description', 'No description available')
                rows.append({
                    'Field': full_path,
                    'Type': prop_type,
                    'Description': description,
                    'Required': 'Yes' if is_required else 'No'
                })
    return rows

In [32]:
# Statement Schema Processing
url = "https://raw.githubusercontent.com/openownership/data-standard/main/schema/statement.json"
json_schema = load_json_from_url(url)
definitions = json_schema.get('$defs', {})
statement_schema = definitions.get('Statement', {})
required_fields = statement_schema.get('required', [])
data = extract_schema_data_statement(statement_schema, definitions, required_fields=required_fields)
df_statement = pd.DataFrame(data)
display(df_statement)


Unnamed: 0,Field,Type,Description,Required
0,statementId,string,A persistent globally unique identifier for th...,Yes
1,statementDate,string,The date on which this statement was declared ...,Yes
2,annotations,array,Annotations about this Statement or parts of t...,No
3,publicationDetails,object,Information concerning the publication of this...,No
4,declaration,string,An identifier or reference for a declaration w...,No
5,declarationSubject,string,A `recordId` value for the subject of a benefi...,Yes
6,recordId,string,A unique identifier for the record (within the...,Yes
7,recordType,string,The type of record (within the publisher's sys...,Yes
8,recordStatus,string,The lifecycle status of the record (within the...,No
9,recordDetails,object,"The details of the entity, person or relations...",Yes


In [4]:
def extract_schema_data_entity(schema, definitions, path='', required_fields=[]):
    rows = []
    if 'properties' in schema:
        properties = schema['properties']
        for prop_name, prop_details in properties.items():
            full_path = f"{path}.{prop_name}" if path else prop_name
            is_required = prop_name in required_fields
            if '$ref' in prop_details:
                resolved_details = resolve_ref(prop_details['$ref'], definitions)
                rows += extract_schema_data_entity(resolved_details, definitions, full_path, required_fields)
            else:
                prop_type = prop_details.get('type', 'N/A')
                description = prop_details.get('description', 'No description available')
                rows.append({
                    'Field': full_path,
                    'Type': prop_type,
                    'Description': description,
                    'Required': 'Yes' if is_required else 'No'
                })
    return rows

In [33]:
# Entity schema processing
url = "https://raw.githubusercontent.com/openownership/data-standard/main/schema/entity-record.json"
json_schema = load_json_from_url(url)
definitions = json_schema.get('$defs', {})
required_fields = json_schema.get('required', [])
data = extract_schema_data_entity(json_schema, definitions, required_fields=required_fields)
df_entity = pd.DataFrame(data)
display(df_entity)


Unnamed: 0,Field,Type,Description,Required
0,isComponent,boolean,Whether this entity is a component in an indir...,Yes
1,entityType,object,The form of the entity described in the Statem...,Yes
2,name,string,The declared name of this entity.,No
3,alternateNames,array,An array of other names this entity is known by.,No
4,identifiers,array,One or more official identifiers for this enti...,No
5,foundingDate,string,"The date on which this entity was founded, cre...",No
6,dissolutionDate,string,The date on which this entity was dissolved or...,No
7,addresses,array,One or more addresses for this entity.,No
8,uri,string,Where a persistent URI (https://en.wikipedia.o...,No
9,formedByStatute,object,The law which mandated the formation of the en...,No


In [7]:
def extract_schema_data_relationship(schema, definitions, path='', required_fields=[]):
    rows = []
    if 'properties' in schema:
        properties = schema['properties']
        for prop_name, prop_details in properties.items():
            full_path = f"{path}.{prop_name}" if path else prop_name
            is_required = prop_name in required_fields
            if '$ref' in prop_details:
                resolved_details = resolve_ref(prop_details['$ref'], definitions)
                rows += extract_schema_data_relationship(resolved_details, definitions, full_path, required_fields)
            else:
                prop_type = prop_details.get('type', 'N/A')
                description = prop_details.get('description', 'No description available')
                rows.append({
                    'Field': full_path,
                    'Type': prop_type,
                    'Description': description,
                    'Required': 'Yes' if is_required else 'No'
                })
    return rows


In [34]:
# Relationship schema processing
url = "https://raw.githubusercontent.com/openownership/data-standard/main/schema/relationship-record.json"
json_schema = load_json_from_url(url)
definitions = json_schema.get('$defs', {})
required_fields = json_schema.get('required', [])
data = extract_schema_data_relationship(json_schema, definitions, required_fields=required_fields)
df_relationship = pd.DataFrame(data)
display(df_relationship)


Unnamed: 0,Field,Type,Description,Required
0,isComponent,boolean,Whether this relationship is a component of a ...,Yes
1,componentRecords,array,The `recordId` values of all component records...,No
2,subject,,The `recordId` for the subject of the relation...,Yes
3,interestedParty,,The `recordId` for the interested party in the...,Yes
4,interests,array,A description of the interests held by the int...,No


In [10]:
def extract_schema_data_components(schema, definitions, path='', required_fields=[]):
    rows = []
    if 'properties' in schema:
        properties = schema['properties']
        for prop_name, prop_details in properties.items():
            full_path = f"{path}.{prop_name}" if path else prop_name
            is_required = prop_name in required_fields
            if '$ref' in prop_details:
                resolved_details = resolve_ref(prop_details['$ref'], definitions)
                rows += extract_schema_data_components(resolved_details, definitions, full_path, required_fields)
            else:
                prop_type = prop_details.get('type', 'N/A')
                description = prop_details.get('description', 'No description available')
                rows.append({
                    'Field': full_path,
                    'Type': prop_type,
                    'Description': description,
                    'Required': 'Yes' if is_required else 'No'
                })
    return rows


In [12]:
# Components schema processing
url = "https://raw.githubusercontent.com/openownership/data-standard/main/schema/components.json"
json_schema = load_json_from_url(url)
definitions = json_schema.get('$defs', {})
data = []
for component_name, component_schema in definitions.items():
    required_fields = component_schema.get('required', [])
    component_data = extract_schema_data_components(component_schema, definitions, path=component_name, required_fields=required_fields)
    data.extend(component_data)

df_components = pd.DataFrame(data)
display(df_components)


Unnamed: 0,Field,Type,Description,Required
0,Address.type,string,"The function of the address, using the address...",No
1,Address.address,string,"The address, with each line or component separ...",No
2,Address.postCode,string,The postal code for this address.,No
3,Country.name,string,The name of the country,Yes
4,Country.code,string,The 2-letter country code (ISO 3166-1) for thi...,No
5,Jurisdiction.name,string,The name of the jurisdiction,Yes
6,Jurisdiction.code,string,The 2-letter country code (ISO 3166-1) or the ...,No
7,Identifier.id,string,"The identifier for a person or entity, as issu...",No
8,Identifier.scheme,string,"For entities, a code from org-id.guide (https:...",No
9,Identifier.schemeName,string,The name of the identifier-issuing authority.,No


In [35]:
df_statement['Source'] = 'Statement'
df_entity['Source'] = 'Entity'
df_relationship['Source'] = 'Relationship'
df_components['Source'] = 'Components'

df_final = pd.concat([df_statement, df_entity, df_relationship, df_components], ignore_index=True)

display(df_final)

Unnamed: 0,Field,Type,Description,Required,Source
0,statementId,string,A persistent globally unique identifier for th...,Yes,Statement
1,statementDate,string,The date on which this statement was declared ...,Yes,Statement
2,annotations,array,Annotations about this Statement or parts of t...,No,Statement
3,publicationDetails,object,Information concerning the publication of this...,No,Statement
4,declaration,string,An identifier or reference for a declaration w...,No,Statement
5,declarationSubject,string,A `recordId` value for the subject of a benefi...,Yes,Statement
6,recordId,string,A unique identifier for the record (within the...,Yes,Statement
7,recordType,string,The type of record (within the publisher's sys...,Yes,Statement
8,recordStatus,string,The lifecycle status of the record (within the...,No,Statement
9,recordDetails,object,"The details of the entity, person or relations...",Yes,Statement
