In [1]:
import re
import json

def load_json(fp):
    with open(fp) as jsf:
        return json.load(jsf)

StartingTritets = {
  '000': 'ANNOTATED_T',
  '001': 'CESR_T_COUNT_CODE',
  '010': 'CESR_T_OP_CODE',
  '011': 'JSON',
  '100': 'MGPK', # fixMap
  '101': 'CBOR', # Map Major Type 5
  '111': 'CESR_B', # count code or op code
}

def dict_to_keri_byte_str(data_dict, to_bytes=True):
    # Convert the dictionary to a JSON string without extra spaces after commas and colons
    json_str = json.dumps(data_dict, separators=(',', ':'))
    if to_bytes:
        json_str = json_str.encode('utf-8')
    return json_str
    

def get_file_length_in_bytes(file_path):
    """
    Reads a file in as bytes and returns its length.

    Parameters:
        file_path (str): The path to the file.

    Returns:
        int: The length of the file in bytes.
    """
    with open(file_path, 'rb') as file:
        file_bytes = file.read()
        return len(file_bytes)
def read_file_as_bytes(file_path):
    with open(file_path, 'rb') as file:
        file_bytes = file.read()
        return file_bytes

def is_bytes(obj):
    return isinstance(obj, bytes)
def is_string(obj):
    return isinstance(obj, str)

def byte_to_bits(byte):
    return format(byte, '08b')
  
def bytes_to_bits(byte_array):
    return ''.join(format(byte, '08b') for byte in byte_array)

def get_file_length_in_chars(file_path):
        """
        Reads a file as a string and returns its character length.
    
        Parameters:
            file_path (str): The path to the file.
    
        Returns:
            int: The length of the file in characters.
        """
        with open(file_path, 'r', encoding='utf-8') as file:
            file_content = file.read()
            return len(file_content)



def get_stream_tritet(stream):
      # to bytes
      if not is_bytes(stream):
          stream = stream.encode()
    

      first_byte = stream[0]
      first_bits = byte_to_bits(first_byte)
      first_tritet = first_bits[0:3]
      print('first_tritet', first_tritet)
      return StartingTritets[first_tritet]




B64_VALUES = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"

def value_of(ch):
  return B64_VALUES.index(ch)

def b64_to_int(value):
  # see: https://trustoverip.github.io/tswg-cesr-specification/#non-canonical-base64
  result = 0

  # Iterate over each character in the Base64 string
  for ch in value:
      result <<= 6  # Shift left by 6 bits
      result |= value_of(ch)  # OR with the Base64 value of the character
  return result

def int_to_b64(value, length=4):
    """
    Converts an integer to a Base64 string of a specified length using the custom Base64 set.
    Pads the front with 'A' if necessary to achieve the desired length.

    Parameters:
        value (int): The integer to convert.
        length (int): The desired length of the Base64 string.

    Returns:
        str: The Base64 string representation of the integer, padded to the specified length.
    """
    if value < 0 or value >= (1 << (6 * length)):  # Ensure the integer fits in the specified length
        raise ValueError(f"Integer value out of range for {length} Base64 characters")

    # Convert integer to Base64 characters
    b64_string = ""
    for _ in range(length):
        b64_string = B64_VALUES[value & 0x3F] + b64_string  # Get last 6 bits and prepend character
        value >>= 6  # Shift value right by 6 bits for the next character

    # If the resulting string is shorter than the desired length, pad with 'A' at the front
    return b64_string.rjust(length, "A")


def determine_keri_version(stream):
    
    version_1_pattern = r'\{"v":"(KERI|ACDC)[0-9a-f]{2}(JSON|CBOR|MGPK|CESR)[0-9a-f]{6}_"'
    version_1_pattern_compiled = re.compile(version_1_pattern)
    
    version_2_pattern = r'\{"v":"(KERI|ACDC)([A-Za-z0-9_-]{3})(JSON|CBOR|MGPK|CESR)[A-Za-z0-9_-]{4}\."'
    version_2_pattern_compiled = re.compile(version_2_pattern)
    # match_v2 = re.search(version_2_pattern_compiled, v_string_candidate)  
    
    
    if is_bytes(stream):
        stream = stream.decode('utf-8')
    
    stream =  stream.replace(' ', '')

    # thing should start with the version string 
    v_string_candidate = stream[:24]
    
    match_v1 = re.search(version_1_pattern_compiled, v_string_candidate)
    match_v2 = re.search(version_2_pattern_compiled, v_string_candidate)  

    if match_v1 and not match_v2:
        return 1
    if match_v2 and not match_v1:
        return 2
    if match_v2 and match_v1:
        return -1 # this is an error, should never happen
    else:
        return None
        
        

    
        
def get_version_string_info(v_string, version=1):
    # see: https://trustoverip.github.io/tswg-cesr-specification/#version-string-field

    # VERSION 2
    ## `PPPPVVVKKKKBBBB.` (len 16)
    ### PPPP (KERI | ACDC)
    ### VVV (VERSION 2!) keri BASE64 CAA --> 2.00, CAQ --> 1.16
    ### KKKK (JSON, CBOR, MGPK, CESR)
    ### BBBB (INT KERI ENCODED BASE64 (version 2!)  see def b64_to_int(value)
    ### version 2.XX terminator character .
    
    # VERSION 1
    ## `PPPPvvKKKKllllll_` (len 17) ...
    ### PPPP (KERI | ACDC)
    ### VV (VERSION 1!)  lowercase hexadecimal notation. Major(hex) Minor(hex)
    ### KKKK (JSON, CBOR, MGPK, CESR)
    ### llllll (INT lower hexidecminal notation (version 1!)
    ### legacy version terminator character _
    
    KNOWN_VERSIONS = [1,2]
    if version not in KNOWN_VERSIONS:
         raise ValueError(f"Unrecognized version: '{version}'. Valid options are: {KNOWN_VERSIONS}")
    
    if not is_string(v_string):
        v_string = v_string.decode()
    
    v_string = v_string.replace('"', '')
    
    if version == 1:
        stop_delim = v_string.index('_')

        _protocol = v_string[0:4]
        _version = v_string[4:6]
        _kind = v_string[6:10]
        _size = v_string[10:stop_delim]
        _size_length =int(_size, 16)
    

    elif version == 2:
        stop_delim = v_string.index('.')

        _protocol = v_string[0:4]
        _version = '.'.join(str(value_of(c)) for c in v_string[4:7])
        _kind = v_string[7:11]
        _size = v_string[11:stop_delim]
        _size_length =b64_to_int(_size)
    return {
      'protocol': _protocol,
      'version': _version,
      'kind': _kind, # serial
      '_size': _size, # digits
      'size': _size_length
    }


In [2]:
# version 1
print(determine_keri_version('{"v":"KERI10JSON000249_",'))
# version 2
print(determine_keri_version('{"v":"ACDC200JSONAAhK.",'))



1
2


In [3]:
6*4

24

In [4]:
def print_messy_info(fp):
    byte_length = get_file_length_in_bytes(fp)
    
    f_bytes = read_file_as_bytes(fp)
    # f_str = f_
    print(f_bytes)
    print('bytes', byte_length)
    char_length = get_file_length_in_chars(fp)
    print('chars', char_length)
    stream_parser_type = get_stream_tritet(f_bytes)
    version = determine_keri_version(f_bytes)
    print('-'*88)
    print(f'version: {version}')
    print('tritet kind', stream_parser_type)
    if stream_parser_type == 'JSON':
        ## TODO: determine if assuming 5:25 is safe?
        v_string = f_bytes[5:25].strip()
        v_info = get_version_string_info(v_string, version)
        
        print(v_string)
        print(v_info)
    return v_info


In [5]:
## version string corrected by kli saidify
# file: ecr-authorization-vlei-credential.json 
## is output when kli saidify --file ~ecr-authorization-vlei-credential-ORIGINAL.json
## kli saidify recalculates the version string to the correct length (2122) and replaces the version string to represent this.
## this also changes the said to the value: "EDb9n2N2rDONME256eFcFYSTTn5qkKsu7u0DIOvi0rA3"
# kli version
# 1.1.19
fp = '../tests/acdcs/ecr-authorization-vlei-credential.json'


v_info = print_messy_info(fp)
ks = dict_to_keri_byte_str(load_json(fp))
print('Actual Length / size: ', len(ks), 'bytes')
print('version size matches actual size:', len(ks) == v_info['size'])



b'{"v": "ACDC10JSON00084a_", "d": "EDb9n2N2rDONME256eFcFYSTTn5qkKsu7u0DIOvi0rA3", "i": "EKXPX7hWw8KK5Y_Mxs2TOuCrGdN45vPIZ78NofRlVBws", "ri": "EuqwB_iOD86eK0ynAhA6AYwWvPeBhvmbcmOD-9cCmiVU", "s": "ELG17Q0M-uLZcjidzVbF7KBkoUhZa1ie3Az3Q_8aYi8s", "a": {"d": "E9-86Jag34CrJpfNFz_-7E5HA0Dj0FvcYNoFVe7qwkiI", "dt": "2022-08-25T14:07:30.536257+00:00", "i": "EY4ldIBDZP4Tpnm3RX320BO0yz8Uz2nUSN-C409GnCJM", "AID": "Esf8b_AngI1d0KbOFjPGIfpVani0HTagWeaYTLs14PlE", "LEI": "6383001AJTYIGC8Y1X37", "personLegalName": "John Smith", "engagementContextRole": "Chief Executive Officer"}, "e": {"d": "EsOf5_YgX_64z4YuHNFWLUnIKcyvsVQOe_vJ_638X6gE", "le": {"n": "ESyLzoJC4L_1abXOEN4f6uNZCmhqyEHg2geBHFhJ8KDs", "s": "ENPXp1vQzRF6JwIuS-mp2U8Uf1MoADoP_GqQ62VsDZWY"}}, "r": {"d": "EDIai3Wkd-Z_4cezz9nYEcCK3KNH5saLvZoS_84JL6NU", "usageDisclaimer": {"l": "Usage of a valid, unexpired, and non-revoked vLEI Credential, as defined in the associated Ecosystem Governance Framework, does not assert that the Legal Entity is trustwort

In [6]:
fp = '../tests/acdcs/ecr-authorization-vlei-credential-ORIGINAL.json'

v_info = print_messy_info(fp)
ks = dict_to_keri_byte_str(load_json(fp))
print('Actual Length / size: ', len(ks), 'bytes')
print('version size matches actual size:', len(ks) == v_info['size'])

b'{"v":"ACDC10JSON0004e4_","d":"EuF1gpodKbbqS0fqmUiOYf-MusuNvi0OmY8Js6SKSdfE","i":"EKXPX7hWw8KK5Y_Mxs2TOuCrGdN45vPIZ78NofRlVBws","ri":"EuqwB_iOD86eK0ynAhA6AYwWvPeBhvmbcmOD-9cCmiVU","s":"ELG17Q0M-uLZcjidzVbF7KBkoUhZa1ie3Az3Q_8aYi8s","a":{"d":"E9-86Jag34CrJpfNFz_-7E5HA0Dj0FvcYNoFVe7qwkiI","dt":"2022-08-25T14:07:30.536257+00:00","i":"EY4ldIBDZP4Tpnm3RX320BO0yz8Uz2nUSN-C409GnCJM","AID":"Esf8b_AngI1d0KbOFjPGIfpVani0HTagWeaYTLs14PlE","LEI":"6383001AJTYIGC8Y1X37","personLegalName":"John Smith","engagementContextRole":"Chief Executive Officer"},"e":{"d":"EsOf5_YgX_64z4YuHNFWLUnIKcyvsVQOe_vJ_638X6gE","le":{"n":"ESyLzoJC4L_1abXOEN4f6uNZCmhqyEHg2geBHFhJ8KDs","s":"ENPXp1vQzRF6JwIuS-mp2U8Uf1MoADoP_GqQ62VsDZWY"}},"r":{"d":"EDIai3Wkd-Z_4cezz9nYEcCK3KNH5saLvZoS_84JL6NU","usageDisclaimer":{"l":"Usage of a valid, unexpired, and non-revoked vLEI Credential, as defined in the associated Ecosystem Governance Framework, does not assert that the Legal Entity is trustworthy, honest, reputable in its business 

In [7]:
## the said in this file has not been corrected to include the new version 2 string.
### manually wrote the version 2 string...
fp = '../tests/acdcs/ecr-authorization-vlei-credential-SAMPLEV2.json'

v_info = print_messy_info(fp)
ks = dict_to_keri_byte_str(load_json(fp))
print('Actual Length / size: ', len(ks), 'bytes')
print('version size matches actual size:', len(ks) == v_info['size'])


b'{"v":"ACDCCABJSONAAhJ.","d":"EuF1gpodKbbqS0fqmUiOYf-MusuNvi0OmY8Js6SKSdfE","i":"EKXPX7hWw8KK5Y_Mxs2TOuCrGdN45vPIZ78NofRlVBws","ri":"EuqwB_iOD86eK0ynAhA6AYwWvPeBhvmbcmOD-9cCmiVU","s":"ELG17Q0M-uLZcjidzVbF7KBkoUhZa1ie3Az3Q_8aYi8s","a":{"d":"E9-86Jag34CrJpfNFz_-7E5HA0Dj0FvcYNoFVe7qwkiI","dt":"2022-08-25T14:07:30.536257+00:00","i":"EY4ldIBDZP4Tpnm3RX320BO0yz8Uz2nUSN-C409GnCJM","AID":"Esf8b_AngI1d0KbOFjPGIfpVani0HTagWeaYTLs14PlE","LEI":"6383001AJTYIGC8Y1X37","personLegalName":"John Smith","engagementContextRole":"Chief Executive Officer"},"e":{"d":"EsOf5_YgX_64z4YuHNFWLUnIKcyvsVQOe_vJ_638X6gE","le":{"n":"ESyLzoJC4L_1abXOEN4f6uNZCmhqyEHg2geBHFhJ8KDs","s":"ENPXp1vQzRF6JwIuS-mp2U8Uf1MoADoP_GqQ62VsDZWY"}},"r":{"d":"EDIai3Wkd-Z_4cezz9nYEcCK3KNH5saLvZoS_84JL6NU","usageDisclaimer":{"l":"Usage of a valid, unexpired, and non-revoked vLEI Credential, as defined in the associated Ecosystem Governance Framework, does not assert that the Legal Entity is trustworthy, honest, reputable in its business d

In [8]:
## this manaully written version string, the SAID recalculated via:
### `simple-said/src$ python main.py ../tests/acdcs/ecr-authorization-vlei-credential-SAMPLEV2-CORRECT_SAID.json d`

fp = '../tests/acdcs/ecr-authorization-vlei-credential-SAMPLEV2-CORRECT_SAID.json'

v_info = print_messy_info(fp)
ks = dict_to_keri_byte_str(load_json(fp))
print('Actual Length / size: ', len(ks), 'bytes')
print('version size matches actual size:', len(ks) == v_info['size'])


b'{"v":"ACDCCABJSONAAhJ.","d":"EGT_VyANLkBvIcd_FEiNCRL-ovMHTQ_6cDoFCXN5-LW0","i":"EKXPX7hWw8KK5Y_Mxs2TOuCrGdN45vPIZ78NofRlVBws","ri":"EuqwB_iOD86eK0ynAhA6AYwWvPeBhvmbcmOD-9cCmiVU","s":"ELG17Q0M-uLZcjidzVbF7KBkoUhZa1ie3Az3Q_8aYi8s","a":{"d":"E9-86Jag34CrJpfNFz_-7E5HA0Dj0FvcYNoFVe7qwkiI","dt":"2022-08-25T14:07:30.536257+00:00","i":"EY4ldIBDZP4Tpnm3RX320BO0yz8Uz2nUSN-C409GnCJM","AID":"Esf8b_AngI1d0KbOFjPGIfpVani0HTagWeaYTLs14PlE","LEI":"6383001AJTYIGC8Y1X37","personLegalName":"John Smith","engagementContextRole":"Chief Executive Officer"},"e":{"d":"EsOf5_YgX_64z4YuHNFWLUnIKcyvsVQOe_vJ_638X6gE","le":{"n":"ESyLzoJC4L_1abXOEN4f6uNZCmhqyEHg2geBHFhJ8KDs","s":"ENPXp1vQzRF6JwIuS-mp2U8Uf1MoADoP_GqQ62VsDZWY"}},"r":{"d":"EDIai3Wkd-Z_4cezz9nYEcCK3KNH5saLvZoS_84JL6NU","usageDisclaimer":{"l":"Usage of a valid, unexpired, and non-revoked vLEI Credential, as defined in the associated Ecosystem Governance Framework, does not assert that the Legal Entity is trustworthy, honest, reputable in its business d