This is a developer script to help quickly get started on onboarding a new NodeSchema from AWS.
It scans through the "[service name]/[latest spec date]/service-2.json" file in the botocore repo.

For now, it can only generate a `CartographyNodeProperties`, and not yet a `CartographyRelProperties`.

0. git clone git@github.com:boto/botocore.git to some path and set `BOTOCORE_REPO_PATH` to that path.

1. Open ths notebook

2. Get an idea of the name of the object you want to find.

    ```python
    list_object_names('kms')
    ...
    KeyMetadata
    ...
    ```

3. Generate the NodeSchema

    ```python
    s = build_from_aws_service('kms', 'KeyMetadata')
    print(s)
    ```

4. Make appropriate changes, such as an appropriate `id`, and choices for `extra_index` and `set_in_kwargs`.

In [None]:
"""
This is a developer script to help quickly get started on onboarding a new NodeSchema from AWS.
It scans through the "[service name]/[latest spec date]/service-2.json" file in the botocore repo.

For now, it can only generate a `CartographyNodeProperties`, and not yet a `CartographyRelProperties`.

0. git clone git@github.com:boto/botocore.git to some path and set `BOTOCORE_REPO_PATH` to that path.

1. Open ths notebook

2. Get an idea of the name of the object you want to find.

```python
list_object_names('kms')
...
KeyMetadata
...
```

3. Generate the NodeSchema

```
s = build_from_aws_service('kms', 'KeyMetadata')
print(s)

4. Make appropriate changes, such as an appropriate `id`, and choices for `extra_index` and `set_in_kwargs`.
"""

BOTOCORE_REPO_PATH = '/Users/myself/src/botocore'

import json
from pprint import pprint
from typing  import Dict, List
import re, os
ParsedMembers = Dict[str, str]
STRING_SHAPE = 'String'


def list_object_names(service_name: str) -> List[str]:
    '''
    List the objects retured by various List* Get* and Describe* calls.

    The current method is somewhat crude in that it looks directly at data['shapes'],
    which includes shapes that we are not interested.
    For example, data['shapes'] has 'DescribeRouteTablesRequest', 'DescribeRouteTablesResult', 
    and 'RouteTable', even though you may not be interested in the request and response structures.
    So, we filter out shapes that end with either 'Request', 'Response', or 'Result'.

    A better way could be to reason throughn the data in a more intelligent way, digging continually
    from the API method we're interested in.
    
    e.g.,
    data['shapes']['DescribeRouteTables']['output']['shape'] -> 'DescribeRouteTablesResult',

    data['shapes']['DescribeRouteTablesResult']['type'] -> 'structure'
    data['shapes']['DescribeRouteTablesResult']['members'].items()[0]['shape'] -> 'RouteTableList' # plural 'members'

    data['shapes']['RouteTableList']['type'] -> 'list'
    data['shapes']['RouteTableList']['member'].items()[0]['shape'] -> 'RouteTable' # singular 'member'

    # finally, our target object
    data['shapes']['RouteTable']['type'] -> 'structure'

    We may also do a kind of fuzzy searching. e.g., 'Key' -> 'KeyMetadata'.
    '''
    shapes = _get_object_shapes(service_name=service_name)
    object_names: List[str] = []
    for object_name, object in shapes.items():
        if any([
            object_name.endswith('Request'),
            object_name.endswith('Response'),
            object_name.endswith('Result'),
            object_name.endswith('Exception'),
            object['type'] != 'structure'
        ]):
            continue
        object_names.append(object_name)
    return object_names

def build_from_aws_service(service_name: str, object_name: str) -> str:
    '''
    Build a NodeSchema file from a particular object in AWS.
    Currently only supports string properties.
    '''
    shapes = _get_object_shapes(service_name=service_name)
    object_shape: Dict[str, any] = shapes[object_name]
    parsed_members = _parse_members(object_shape=object_shape)
    out = _build_node_class(node_name=object_name, node_members=parsed_members)
    return out

def _get_object_shapes(service_name: str) -> Dict[str, any]:
    '''
    Gets all object shapes from the service descriptor file.
    '''
    service_descriptor = _load_service_descriptor(service_name=service_name)
    shapes: Dict[str, any] = service_descriptor['shapes']
    return shapes

def _load_service_descriptor(service_name: str) -> Dict[str, any]:
    all_specs = _list_folders('{botocore_path}/botocore/data/{service_name}'.format(
        botocore_path=BOTOCORE_REPO_PATH,
        service_name=service_name
    ))
    latest_spec = sorted(all_specs)[-1]
    file_path = '{botocore_path}/botocore/data/{service_name}/{spec_date}/service-2.json'.format(
        botocore_path=BOTOCORE_REPO_PATH,
        service_name=service_name,
        spec_date=latest_spec,
    )
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

def _parse_members(object_shape: Dict[str, any]) -> ParsedMembers:
    '''
    Takes a shape and returns a Dict: snake_prop_name -> CamelPropertyRef
    Currently only handles String type properties.
    '''
    parsed_members: ParsedMembers = dict()
    for member_name, member_spec in object_shape['members'].items():
        # if member_spec['shape'] != STRING_SHAPE:
        #     continue
        prop_name = _camel_to_snake(member_name)
        propertyRef = member_name
        parsed_members[prop_name] = propertyRef
    return parsed_members

def _build_node_class(node_name: str, node_members: ParsedMembers) -> str:
    class_str = '''class {node_name}NodeProperties(CartographyNodeProperties):
    """
    Schema describing a {node_name}.
    """
{props}
''' 
    rendered_props = ''
    # for prop_name, property_ref in node_members.items():
    for prop_name in sorted(node_members.keys()):
        property_ref = node_members[prop_name]
        redered_prop = "    {prop_name}: PropertyRef = PropertyRef('{property_ref}')".format(prop_name=prop_name, property_ref=property_ref)
        rendered_props += redered_prop + "\n"
    out = class_str.format(node_name=node_name, props=rendered_props)
    return out

def _camel_to_snake(name: str) -> str:
    '''
    AI-generated function: "RouteTableId" -> "route_table_id"
    '''
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()

def _list_folders(directory: str) -> List[str]:
    '''
    AI-generated: lists subfolders.
    '''
    return [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]


In [12]:
list_object_names('kms')

['AliasListEntry',
 'CustomKeyStoresListEntry',
 'GrantConstraints',
 'GrantListEntry',
 'KeyListEntry',
 'KeyMetadata',
 'MultiRegionConfiguration',
 'MultiRegionKey',
 'RecipientInfo',
 'RotationsListEntry',
 'Tag',
 'XksKeyConfigurationType',
 'XksProxyAuthenticationCredentialType',
 'XksProxyConfigurationType']

In [13]:
s = build_from_aws_service('kms', 'KeyMetadata')
print(s)

class KeyMetadataNodeProperties(CartographyNodeProperties):
    """
    Schema describing a KeyMetadata.
    """
    arn: PropertyRef = PropertyRef('Arn')
    aws_account_id: PropertyRef = PropertyRef('AWSAccountId')
    cloud_hsm_cluster_id: PropertyRef = PropertyRef('CloudHsmClusterId')
    creation_date: PropertyRef = PropertyRef('CreationDate')
    custom_key_store_id: PropertyRef = PropertyRef('CustomKeyStoreId')
    customer_master_key_spec: PropertyRef = PropertyRef('CustomerMasterKeySpec')
    deletion_date: PropertyRef = PropertyRef('DeletionDate')
    description: PropertyRef = PropertyRef('Description')
    enabled: PropertyRef = PropertyRef('Enabled')
    encryption_algorithms: PropertyRef = PropertyRef('EncryptionAlgorithms')
    expiration_model: PropertyRef = PropertyRef('ExpirationModel')
    key_agreement_algorithms: PropertyRef = PropertyRef('KeyAgreementAlgorithms')
    key_id: PropertyRef = PropertyRef('KeyId')
    key_manager: PropertyRef = PropertyRef('KeyManager'