In [1]:
import json
from difflib import SequenceMatcher
import itertools

In [2]:
def create_tokenizers(token_dict_1, token_dict_2):
    node_type_token_dict = {
        node_type: chr(i)
        for i, node_type in enumerate(
            set(
                token_dict_1.keys()
                | token_dict_2.keys()
            )
        )
    }

    token_node_type_dict = {token: node_type for node_type, token in node_type_token_dict.items()}
    return node_type_token_dict, token_node_type_dict

In [3]:
def tokenize_seqs(node_type_token_dict, seqs):
    def _tokenize(path) -> str:
        return "".join(list(map(lambda x: node_type_token_dict[x], path)))

    tokenized_seqs = [_tokenize(seq) for seq in seqs]
    return tokenized_seqs

In [4]:
def detokenize_seqs(token_node_type_dict, seqs):
    def _detokenize(tokenized_seq):
        return [token_node_type_dict[token] for token in tokenized_seq]

    detokenized_seqs = [_detokenize(seq) for seq in seqs]
    return detokenized_seqs

In [5]:
def recursive_submatching(seqs, seq_len):
    submatches = set(seqs)
    for i, j in itertools.combinations(seqs, 2):
        matcher = SequenceMatcher(None, i, j)
        matching_blocks = list(matcher.get_matching_blocks())
        for matching_block in matching_blocks:
            a, b, size = matching_block.a, matching_block.b, matching_block.size
            if matching_block.size > 3:
                match_1 = i[a : a + size]
                submatches.add(match_1)
                if i in submatches:
                    submatches.remove(i)
                if j in submatches:
                    submatches.remove(j)
    print(len(submatches))
    if seq_len - len(submatches) > 0:
        return recursive_submatching(set(submatches), len(submatches))
    return seqs

In [6]:
def get_unq_sequences(seqs):
    total_sequences = 0
    unique_sequences = set()
    for i, (_, sequences) in enumerate(seqs['results'].items()):
        total_sequences += len(sequences)
        unique_sequences |= set(sequences)

    unique_sequences_stripped = set()
    for seq in unique_sequences:
        new_seq = seq.strip(seqs['node_type_token_dict']['input'])
        new_seq = new_seq.strip(seqs['node_type_token_dict']['output'])
        if len(new_seq) > 2:
            unique_sequences_stripped.add(new_seq)

    unique_sequences_decoded = []
    for sequence in unique_sequences_stripped:
        decoded_sequence = [seqs['token_node_type_dict'][i] for i in sequence]
        unique_sequences_decoded.append(decoded_sequence)

    print(f"Total Sequences: {total_sequences:,}, Total Unique Sequences: {len(unique_sequences_stripped):,}, Total Paths Compared: {seqs['total_path_pairs_analyzed']:,}")
    return unique_sequences_stripped, unique_sequences_decoded, total_sequences 

# PyTorch

In [93]:
with open('./onnx_parsing_results/torch_mismatch_seq_match_results.json', 'r') as f:
    mismatched_seq = json.load(f)

with open('./onnx_parsing_results/torch_correct_mismatch_seq_match_results.json', 'r') as f:
    mismatched_correct = json.load(f)

with open('./onnx_parsing_results/torch_test_mismatch_seq_match_results.json', 'r') as f:
    mismatched_test = json.load(f)

In [94]:
print("Mismatched Ops:", len(mismatched_seq['node_type_token_dict'].keys()))
print("Mismatched-Correct Ops:", len(mismatched_correct['node_type_token_dict'].keys()))
print("Mismatched-Test Ops:", len(mismatched_test['node_type_token_dict'].keys()))


Mismatched Ops: 58
Mismatched-Correct Ops: 59
Mismatched-Test Ops: 62


In [95]:
mismatched_seq['node_type_token_dict'].keys() - mismatched_correct['node_type_token_dict'].keys()

set()

In [96]:
mismatched_test['node_type_token_dict'].keys() - mismatched_correct['node_type_token_dict'].keys()


{'DepthToSpace', 'Flatten', 'Unsqueeze'}

## Mismatched Sequences

In [97]:
(
    unique_mismatched_sequences,
    unique_mismatched_sequences_decoded,
    total_mismatched_sequences,
) = get_unq_sequences(mismatched_seq)

Total Sequences: 70,301, Total Unique Sequences: 980, Total Paths Compared: 213,258,432


In [98]:
len(recursive_submatching(unique_mismatched_sequences, len(unique_mismatched_sequences)))

664
638
638


638

## Correct-Mismatched Sequences

In [99]:
(
    unique_corr_mismatched_sequences,
    unique_corr_mismatched_sequences_decoded,
    total_corr_mismatched_sequences,
) = get_unq_sequences(mismatched_correct)

Total Sequences: 923,707, Total Unique Sequences: 4,243, Total Paths Compared: 2,358,066,840


In [100]:
len(recursive_submatching(unique_corr_mismatched_sequences, len(unique_corr_mismatched_sequences)))

3094
2992
2990
2990


2990

### Create New Tokenizers and Calculate Non-Overlapping

In [101]:
node_type_token_dict, token_node_type_dict = create_tokenizers(
    mismatched_correct["node_type_token_dict"], mismatched_seq["node_type_token_dict"]
)

In [102]:
mismatched_sequences = set(tokenize_seqs(node_type_token_dict, unique_mismatched_sequences_decoded))
mismatched_correct_sequences = set(tokenize_seqs(node_type_token_dict, unique_corr_mismatched_sequences_decoded))

#### Nonoverlapping

In [103]:
seqs = mismatched_sequences - mismatched_correct_sequences
len(seqs)

176

In [104]:
detokenize_seqs(token_node_type_dict, seqs)

[['Reshape', 'Conv', 'Reshape'],
 ['Reshape', 'Cast', 'MatMul', 'Div'],
 ['PRelu', 'Concat', 'Shape', 'Slice', 'Concat', 'Resize', 'ReduceSum'],
 ['ArgMin', 'Expand', 'Greater'],
 ['Pad', 'AveragePool', 'ArgMin', 'Less'],
 ['Cos', 'Cast', 'Cast', 'Mul'],
 ['Ceil', 'Max', 'Conv'],
 ['ReduceSum', 'Resize', 'Div'],
 ['ReduceSum', 'Resize', 'Mul'],
 ['Atan', 'Div', 'Cos'],
 ['Concat', 'Neg', 'Cast'],
 ['Greater', 'Where', 'Slice'],
 ['Shape', 'Slice', 'Concat', 'Resize', 'Concat', 'Squeeze'],
 ['Cast', 'LeakyRelu', 'Tan'],
 ['Add', 'Div', 'Greater'],
 ['Cos', 'Add', 'Div'],
 ['Sub', 'Mul', 'Sigmoid'],
 ['Cast', 'Floor', 'LeakyRelu'],
 ['Concat', 'Max', 'Div', 'Erf', 'Add', 'Mul', 'Mul'],
 ['Where', 'Shape', 'Slice', 'Concat', 'Resize', 'Concat'],
 ['Squeeze', 'Mul', 'Div'],
 ['ArgMax', 'Greater', 'Where', 'Slice'],
 ['Cast', 'Cast', 'Cast', 'Sub'],
 ['Div',
  'Erf',
  'Add',
  'Mul',
  'Mul',
  'Clip',
  'Cast',
  'Shape',
  'Slice',
  'Concat'],
 ['Mul', 'Slice', 'Shape'],
 ['Tan', 'Sub',

In [105]:
reduced_sequences = recursive_submatching(seqs, len(seqs))

145
130
130


In [106]:
detokenize_seqs(token_node_type_dict, reduced_sequences)

[['Reshape', 'Conv', 'Reshape'],
 ['ArgMin', 'Expand', 'Greater'],
 ['Cos', 'Cast', 'Cast', 'Mul'],
 ['Ceil', 'Max', 'Conv'],
 ['ReduceSum', 'Resize', 'Div'],
 ['ReduceSum', 'Resize', 'Mul'],
 ['Atan', 'Div', 'Cos'],
 ['Concat', 'Neg', 'Cast'],
 ['Greater', 'Where', 'Slice'],
 ['Cast', 'LeakyRelu', 'Tan'],
 ['Add', 'Div', 'Greater'],
 ['Cos', 'Add', 'Div'],
 ['Sub', 'Mul', 'Sigmoid'],
 ['Shape', 'Slice', 'Concat', 'Resize'],
 ['Cast', 'Floor', 'LeakyRelu'],
 ['Squeeze', 'Mul', 'Div'],
 ['ArgMax', 'Greater', 'Where', 'Slice'],
 ['Cast', 'Cast', 'Cast', 'Sub'],
 ['Mul', 'Slice', 'Shape'],
 ['Tan', 'Sub', 'Div'],
 ['Atan', 'Cast', 'Sigmoid'],
 ['MatMul', 'Max', 'Pad'],
 ['Concat', 'Neg', 'Concat', 'Concat'],
 ['Cast', 'Floor', 'Ceil'],
 ['Div', 'Abs', 'Atan'],
 ['Ceil', 'Sin', 'Cast'],
 ['Concat', 'ReduceMax', 'Reshape'],
 ['Cast', 'Xor', 'Where', 'Concat', 'Clip'],
 ['Slice', 'Max', 'Resize'],
 ['Cos', 'Relu', 'Concat'],
 ['Round', 'Where', 'Resize'],
 ['Atan', 'Softmax', 'Concat', 'Conc

In [107]:
filtered = list(
    filter(
        lambda x: (len(x) > 2),
        detokenize_seqs(token_node_type_dict, reduced_sequences),
    )
)
len(filtered)

130

In [108]:
filtered_ops = set()
for filt in filtered:
    filtered_ops = filtered_ops.union(filt)
filtered_ops

{'Abs',
 'Add',
 'ArgMax',
 'ArgMin',
 'Atan',
 'BatchNormalization',
 'Cast',
 'Ceil',
 'Clip',
 'Concat',
 'Conv',
 'Cos',
 'Div',
 'Equal',
 'Expand',
 'Floor',
 'Gather',
 'Greater',
 'If',
 'LeakyRelu',
 'Less',
 'MatMul',
 'Max',
 'MaxPool',
 'Min',
 'Mul',
 'Neg',
 'Or',
 'PRelu',
 'Pad',
 'ReduceMax',
 'ReduceMean',
 'ReduceMin',
 'ReduceSum',
 'Relu',
 'Reshape',
 'Resize',
 'Round',
 'Shape',
 'Sigmoid',
 'Sin',
 'Slice',
 'Softmax',
 'Squeeze',
 'Sub',
 'Tan',
 'Transpose',
 'Trilu',
 'Where',
 'Xor'}

### Find Unique Sequence Distribution by Pairs

In [109]:
tokenized_filtered = tokenize_seqs(mismatched_seq['node_type_token_dict'], filtered)

In [110]:
sequence_counts_by_pair = {}
for i, (pair, sequences) in enumerate(mismatched_seq['results'].items()):
    a, b = pair.split(",")
    unq_seq = set(sequences)
    unq_seq_stripped = set()
    for seq in unq_seq:
        new_seq = seq.strip(mismatched_seq['node_type_token_dict']['input'])
        new_seq = new_seq.strip(mismatched_seq['node_type_token_dict']['output'])
        if len(new_seq) > 2:
            unq_seq_stripped.add(new_seq)
    
    for filt_seq in tokenized_filtered:
        if filt_seq in unq_seq_stripped:
            if a not in sequence_counts_by_pair:
                sequence_counts_by_pair[a] = set([filt_seq])
            else:
                sequence_counts_by_pair[a] |= set([filt_seq])
            if b not in sequence_counts_by_pair:
                sequence_counts_by_pair[b] = set([filt_seq])
            else:
                sequence_counts_by_pair[b] |= set([filt_seq])
        else:
            if a not in sequence_counts_by_pair:
                sequence_counts_by_pair[a] = set()
            if b not in sequence_counts_by_pair:
                sequence_counts_by_pair[b] = set()
filtered_sorted_pairs = dict(sorted({key: len(val) for key, val in sequence_counts_by_pair.items()}.items(), key=lambda x: x[1], reverse=True))
filtered_sorted_pairs

{'0': 10,
 '34': 10,
 '14': 9,
 '92': 8,
 '80': 7,
 '87': 7,
 '4': 6,
 '25': 6,
 '27': 6,
 '29': 6,
 '36': 6,
 '65': 6,
 '70': 6,
 '88': 6,
 '91': 6,
 '97': 6,
 '9': 5,
 '33': 5,
 '35': 5,
 '41': 5,
 '51': 5,
 '55': 5,
 '62': 5,
 '78': 5,
 '81': 5,
 '84': 5,
 '86': 5,
 '90': 5,
 '93': 5,
 '1': 4,
 '10': 4,
 '11': 4,
 '17': 4,
 '31': 4,
 '47': 4,
 '49': 4,
 '50': 4,
 '67': 4,
 '68': 4,
 '73': 4,
 '75': 4,
 '89': 4,
 '99': 4,
 '7': 3,
 '8': 3,
 '12': 3,
 '20': 3,
 '22': 3,
 '23': 3,
 '28': 3,
 '46': 3,
 '52': 3,
 '53': 3,
 '57': 3,
 '59': 3,
 '72': 3,
 '76': 3,
 '79': 3,
 '82': 3,
 '95': 3,
 '3': 2,
 '5': 2,
 '16': 2,
 '21': 2,
 '26': 2,
 '30': 2,
 '37': 2,
 '38': 2,
 '39': 2,
 '40': 2,
 '42': 2,
 '43': 2,
 '44': 2,
 '48': 2,
 '54': 2,
 '56': 2,
 '60': 2,
 '61': 2,
 '63': 2,
 '71': 2,
 '74': 2,
 '77': 2,
 '96': 2,
 '2': 1,
 '13': 1,
 '15': 1,
 '19': 1,
 '24': 1,
 '58': 1,
 '64': 1,
 '66': 1,
 '69': 1,
 '83': 1,
 '85': 1,
 '94': 1,
 '98': 1,
 '6': 0,
 '18': 0,
 '32': 0,
 '45': 0}

In [111]:
len(filtered_sorted_pairs)

100

## Mismatched-Test Sequences

In [None]:
(
    unique_test_mismatched_sequences,
    unique_test_mismatched_sequences_decoded,
    total_test_mismatched_sequences,
) = get_unq_sequences(mismatched_test)

In [None]:
len(recursive_submatching(unique_test_mismatched_sequences, len(unique_test_mismatched_sequences)))

### Create New Tokenizers and Calculate Non-Overlapping

In [None]:
node_type_token_dict, token_node_type_dict = create_tokenizers(
    mismatched_test["node_type_token_dict"], mismatched_seq["node_type_token_dict"]
)

In [None]:
mismatched_sequences = set(tokenize_seqs(node_type_token_dict, unique_mismatched_sequences_decoded))
mismatched_test_sequences = set(tokenize_seqs(node_type_token_dict, unique_test_mismatched_sequences_decoded))

#### Nonoverlapping

In [None]:
seqs = mismatched_sequences - mismatched_test_sequences
len(seqs)

In [None]:
reduced_sequences = recursive_submatching(seqs, len(seqs))

In [None]:
detokenize_seqs(token_node_type_dict, reduced_sequences)

In [None]:
list(
    filter(
        lambda x: (len(x) > 2) and ("input" not in x) and ("output" not in x),
        detokenize_seqs(token_node_type_dict, reduced_sequences),
    )
)

# tf2onnx

In [112]:
with open('./onnx_parsing_results/tf2onnx_mismatch_seq_match_results.json', 'r') as f:
    mismatched_seq = json.load(f)
with open('./onnx_parsing_results/tf2onnx_correct_mismatch_seq_match_results.json', 'r') as f:
    mismatched_correct = json.load(f)
with open('./onnx_parsing_results/tf2onnx_test_mismatch_seq_match_results.json', 'r') as f:
    mismatched_test = json.load(f)

In [113]:
print("Mismatched Ops:", len(mismatched_seq['node_type_token_dict'].keys()))
print("Mismatched-Correct Ops:", len(mismatched_correct['node_type_token_dict'].keys()))
print("Mismatched-Test Ops:", len(mismatched_test['node_type_token_dict'].keys()))


Mismatched Ops: 54
Mismatched-Correct Ops: 54
Mismatched-Test Ops: 65


## Mismatched Sequences

In [114]:
(
    unique_mismatched_sequences,
    unique_mismatched_sequences_decoded,
    total_mismatched_sequences,
) = get_unq_sequences(mismatched_seq)

Total Sequences: 156,218, Total Unique Sequences: 2,155, Total Paths Compared: 135,239,592


In [115]:
len(recursive_submatching(unique_mismatched_sequences, len(unique_mismatched_sequences)))

1266
1137
1126
1117
1117


1117

## Correct-Mismatched Sequences

In [116]:
(
    unique_corr_mismatched_sequences,
    unique_corr_mismatched_sequences_decoded,
    total_corr_mismatched_sequences,
) = get_unq_sequences(mismatched_correct)

Total Sequences: 57,890, Total Unique Sequences: 1,050, Total Paths Compared: 80,075,700


In [117]:
len(recursive_submatching(unique_corr_mismatched_sequences, len(unique_corr_mismatched_sequences)))

600
520
509
503
503


503

### Create New Tokenizers and Calculate Non-Overlapping

In [118]:
node_type_token_dict, token_node_type_dict = create_tokenizers(
    mismatched_correct["node_type_token_dict"], mismatched_seq["node_type_token_dict"]
)

In [119]:
mismatched_sequences = set(tokenize_seqs(node_type_token_dict, unique_mismatched_sequences_decoded))
mismatched_correct_sequences = set(tokenize_seqs(node_type_token_dict, unique_corr_mismatched_sequences_decoded))

#### Nonoverlapping

In [120]:
seqs = mismatched_sequences - mismatched_correct_sequences
len(seqs)

1527

In [121]:
reduced_sequences = recursive_submatching(seqs, len(seqs))

1029
901
877
871
871


In [122]:
detokenize_seqs(token_node_type_dict, reduced_sequences)

[['Sigmoid', 'Concat', 'Div'],
 ['Concat', 'Transpose', 'Conv'],
 ['MatMul', 'Concat', 'ReduceSum'],
 ['Reshape', 'Conv', 'Mul', 'Transpose'],
 ['Conv', 'Reshape', 'Div', 'Reshape', 'Conv', 'Reshape'],
 ['Mul', 'MatMul', 'Min'],
 ['Abs', 'Min', 'Max'],
 ['ReduceSum', 'ReduceMean', 'Concat'],
 ['Squeeze', 'Cos', 'Concat'],
 ['Transpose', 'Conv', 'ReduceMean'],
 ['Conv', 'ReduceMax', 'Reshape'],
 ['Concat', 'ReduceProd', 'Mul', 'Add'],
 ['ReduceMax', 'Reshape', 'Conv'],
 ['Atan', 'Concat', 'Reshape'],
 ['And', 'And', 'Not'],
 ['ReduceSum', 'Min', 'Max', 'Gather'],
 ['Reshape', 'Conv', 'Conv', 'Relu'],
 ['Conv', 'Transpose', 'Transpose'],
 ['Reshape', 'Conv', 'Conv', 'Sub'],
 ['Mul', 'Add', 'ReduceProd'],
 ['ReverseSequence', 'Transpose', 'ReverseSequence', 'Transpose'],
 ['Conv', 'Squeeze', 'Transpose'],
 ['Gather', 'Expand', 'ReverseSequence', 'Transpose'],
 ['Concat', 'Reshape', 'ConvTranspose'],
 ['Transpose', 'Conv', 'Conv', 'Transpose'],
 ['Mul', 'Concat', 'Cast'],
 ['Reshape', 'Mat

In [123]:
filtered = list(
    filter(
        lambda x: (len(x) > 2),
        detokenize_seqs(token_node_type_dict, reduced_sequences),
    )
)
len(filtered)

871

In [124]:
filtered_ops = set()
for filt in filtered:
    filtered_ops = filtered_ops.union(filt)
filtered_ops

{'Abs',
 'Add',
 'And',
 'ArgMax',
 'Atan',
 'BatchNormalization',
 'Cast',
 'Ceil',
 'Concat',
 'Conv',
 'ConvTranspose',
 'Cos',
 'DepthToSpace',
 'Div',
 'Erf',
 'Expand',
 'Floor',
 'Gather',
 'GlobalAveragePool',
 'Greater',
 'LRN',
 'LeakyRelu',
 'MatMul',
 'Max',
 'Min',
 'Mul',
 'Neg',
 'Not',
 'Or',
 'ReduceMax',
 'ReduceMean',
 'ReduceMin',
 'ReduceProd',
 'ReduceSum',
 'Relu',
 'Reshape',
 'ReverseSequence',
 'Round',
 'Shape',
 'Sigmoid',
 'Slice',
 'Softmax',
 'SpaceToDepth',
 'Squeeze',
 'Sub',
 'Tan',
 'Transpose',
 'Unsqueeze'}

### Find Unique Sequence Distribution by Pairs

In [125]:
tokenized_filtered = tokenize_seqs(mismatched_seq['node_type_token_dict'], filtered)

In [126]:
sequence_counts_by_pair = {}
for i, (pair, sequences) in enumerate(mismatched_seq['results'].items()):
    a, b = pair.split(",")
    unq_seq = set(sequences)
    unq_seq_stripped = set()
    for seq in unq_seq:
        new_seq = seq.strip(mismatched_seq['node_type_token_dict']['input'])
        new_seq = new_seq.strip(mismatched_seq['node_type_token_dict']['output'])
        if len(new_seq) > 2:
            unq_seq_stripped.add(new_seq)
    
    for filt_seq in tokenized_filtered:
        if filt_seq in unq_seq_stripped:
            if a not in sequence_counts_by_pair:
                sequence_counts_by_pair[a] = set([filt_seq])
            else:
                sequence_counts_by_pair[a] |= set([filt_seq])
            if b not in sequence_counts_by_pair:
                sequence_counts_by_pair[b] = set([filt_seq])
            else:
                sequence_counts_by_pair[b] |= set([filt_seq])
        else:
            if a not in sequence_counts_by_pair:
                sequence_counts_by_pair[a] = set()
            if b not in sequence_counts_by_pair:
                sequence_counts_by_pair[b] = set()
filtered_sorted_pairs = dict(sorted({key: len(val) for key, val in sequence_counts_by_pair.items()}.items(), key=lambda x: x[1], reverse=True))
filtered_sorted_pairs

{'64': 54,
 '71': 54,
 '29': 50,
 '50': 48,
 '82': 43,
 '196': 43,
 '208': 43,
 '77': 37,
 '32': 36,
 '140': 36,
 '166': 36,
 '2': 34,
 '193': 34,
 '216': 33,
 '72': 32,
 '144': 32,
 '156': 32,
 '164': 32,
 '41': 31,
 '60': 31,
 '69': 31,
 '1': 29,
 '211': 29,
 '46': 28,
 '126': 28,
 '151': 27,
 '94': 25,
 '118': 25,
 '133': 25,
 '201': 25,
 '70': 24,
 '91': 24,
 '161': 24,
 '28': 23,
 '90': 23,
 '112': 23,
 '183': 23,
 '15': 21,
 '88': 21,
 '136': 21,
 '182': 21,
 '13': 20,
 '56': 20,
 '86': 20,
 '35': 19,
 '96': 19,
 '202': 19,
 '203': 19,
 '37': 18,
 '38': 18,
 '100': 18,
 '212': 18,
 '79': 17,
 '117': 17,
 '129': 17,
 '121': 16,
 '128': 16,
 '157': 16,
 '30': 15,
 '53': 15,
 '85': 15,
 '102': 15,
 '109': 15,
 '172': 15,
 '51': 14,
 '101': 14,
 '162': 14,
 '173': 14,
 '174': 14,
 '21': 13,
 '27': 13,
 '52': 13,
 '58': 13,
 '76': 13,
 '186': 13,
 '55': 12,
 '78': 12,
 '113': 12,
 '124': 12,
 '127': 12,
 '131': 12,
 '146': 12,
 '152': 12,
 '170': 12,
 '199': 12,
 '200': 12,
 '209': 12

In [127]:
len(sequence_counts_by_pair)

220

## Mismatched-Test Sequences

In [None]:
(
    unique_test_mismatched_sequences,
    unique_test_mismatched_sequences_decoded,
    total_test_mismatched_sequences,
) = get_unq_sequences(mismatched_test)

In [None]:
len(recursive_submatching(unique_test_mismatched_sequences, len(unique_test_mismatched_sequences)))

### Create New Tokenizers and Calculate Non-Overlapping

In [None]:
node_type_token_dict, token_node_type_dict = create_tokenizers(
    mismatched_test["node_type_token_dict"], mismatched_seq["node_type_token_dict"]
)

In [None]:
mismatched_sequences = set(tokenize_seqs(node_type_token_dict, unique_mismatched_sequences_decoded))
mismatched_test_sequences = set(tokenize_seqs(node_type_token_dict, unique_test_mismatched_sequences_decoded))

#### Nonoverlapping

In [None]:
len(mismatched_sequences)

In [None]:
len(mismatched_test_sequences)

In [None]:
len(mismatched_sequences - mismatched_test_sequences)

In [None]:
seqs =  mismatched_sequences - mismatched_test_sequences
len(seqs)

In [None]:
reduced_sequences = recursive_submatching(seqs, len(seqs))

In [None]:
list(
    filter(
        lambda x: (len(x) > 2) and ("input" not in x) and ("output" not in x),
        detokenize_seqs(token_node_type_dict, reduced_sequences),
    )
)