In [330]:
import json
import codecs
from functools import cmp_to_key
from inspect import getsource
from sys import argv
import re
import zlib

complete_match = ["""import json as a,sys
b=sys.argv
a.dump(dict(zip(*[sorted(x)for x in a.load(open(b[1]))])),open(b[2],'w'))""",
                      """import json as a,sys,re
b=sys.argv
a.dump(dict(zip(*[sorted(i,key=lambda y:[int(c)if c.isdigit()else c for c in re.split('(\d+)',y)])for i in a.load(open(b[1]))])),open(b[2],'w'))""",
                      '']
    
incomplete_match = ["""import json as a,sys
b=sys.argv
A=[sorted(x)for x in a.load(open(b[1]))]
t={}
A[1]=[A[1][x+y]for x,y in enumerate([t[i]for i in range(0,len(t),2)for j in range(t[i+1])])] 
a.dump(dict(zip(*A)),open(b[2],'w'))""",
                        '''import json as a,sys,re
b=sys.argv
A=[sorted(x,key=lambda y:[int(c)if c.isdigit()else c for c in re.split('(\d+)',y)])for x in a.load(open(b[1]))]
t={}
A[1]=[A[1][x+y]for x,y in enumerate([t[i]for i in range(0,len(t),2)for j in range(t[i+1])])] 
a.dump(dict(zip(*A)),open(b[2],'w'))''','']

order_key_scripts = ['''import json as a,sys,re
b=sys.argv
N=lambda x:[int(c)if c.isdigit()else c for c in re.split('(\d+)',x)]
def _(A):
    B=''
    D=re.findall('\$[a-zA-Z]\d+',A)
    if not D:
        return N(A)
    F=re.split('/',A) 
    O=[(i.find(D[0])!=-1)for i in F].index(True)
    T= '/'.join(F[:O-1])
    m=re.split('{0}',F[O-1])
    return N((T+'/'if T else'')+m[int(D[0][2:])-1])
a.dump(dict(zip(*[sorted(i,key=_)for i in a.load(open(b[1]))])),open(b[2],'w'))''',
'''import json as a,sys,re
b=sys.argv
N=lambda x:[int(c)if c.isdigit()else c for c in re.split('(\d+)',x)]
def _(A):
    B=''
    D=re.findall('\$[a-zA-Z]\d+',A)
    if not D:
        return N(A)
    F=re.split('/',A) 
    O=[(i.find(D[0])!=-1)for i in F].index(True)
    T= '/'.join(F[:O-1])
    m=re.split('{0}',F[O-1])
    C=re.split('{1}',m[0])
    m[0]=m[0]if len(C)==1 else C[1]
    return N((T+'/'if T else'')+m[int(D[0][2:])-1])
a.dump(dict(zip(*[sorted(i,key=_)for i in a.load(open(b[1]))])),open(b[2],'w'))''']

incomplete_order_key = ['''import json as a,sys,re
b=sys.argv
N=lambda x:[int(c)if c.isdigit()else c for c in re.split('(\d+)',x)]
def _(A):
    B=''
    D=re.findall('\$[a-zA-Z]\d+',A)
    if not D:
        return N(A)
    F=re.split('/',A) 
    O=[(i.find(D[0])!=-1)for i in F].index(True)
    T= '/'.join(F[:O-1])
    m=re.split('{0}',F[O-1])
    return N((T+'/'if T else'')+m[int(D[0][2:])-1])
t={1}
A=[sorted(i,key=_)for i in a.load(open(b[1]))]
A[1]=[A[1][x+y]for x,y in enumerate([t[i]for i in range(0,len(t),2)for j in range(t[i+1])])] 
a.dump(dict(zip(*A)),open(b[2],'w'))''',
'''import json as a,sys,re
b=sys.argv
N=lambda x:[int(c)if c.isdigit()else c for c in re.split('(\d+)',x)]
def _(A):
    B=''
    D=re.findall('\$[a-zA-Z]\d+',A)
    if not D:
        return N(A)
    F=re.split('/',A) 
    O=[(i.find(D[0])!=-1)for i in F].index(True)
    T= '/'.join(F[:O-1])
    m=re.split('{0}',F[O-1])
    C=re.split('{1}',m[0])
    m[0]=m[0]if len(C)==1 else C[1]
    return N((T+'/'if T else'')+m[int(D[0][2:])-1])
t={2}
A=[sorted(i,key=_)for i in a.load(open(b[1]))]
A[1]=[A[1][x+y]for x,y in enumerate([t[i]for i in range(0,len(t),2)for j in range(t[i+1])])] 
a.dump(dict(zip(*A)),open(b[2],'w'))''']

separator = ''
prefix = ''
need_swap = False

def remove_white_space(ori):
    return "".join(str(ori).split(' '))

def shift_count(ori,new):
    count = 0
    for i in range(len(ori)):
        if ori[new[0][i]] != new[1][i]:
            count += 1
    return count

def generate_shift_table(ori,new):
    idx=[]
    if not new:
        return []
    for i in range(len(ori)):
        if ori[new[0][i]] != new[1][i]:
            idx.append(new[1].index(ori[new[0][i]])-i)
        else:
            idx.append(0)
    return idx

def generate_run_length_table(idx):
    # [initialization]
    # add end flag
    idx += ['*']  
    # current run
    current = idx[0] 
    # current length
    run_length = 0
    # final output info
    res = []
    # readble output info
    readable = []
    
    # [run_length]
    for i in range(len(idx)):
        if idx[i] == current:
            run_length += 1
        else:
            res += [current,run_length]
            readable.append([current,run_length])
            current = idx[i]
            run_length = 1
    return res, readable

def two_level_run_length(table):
    flag = False
    temp = []
    tltable = []
    for i in range(0,len(table),2):
        if not flag and table[i+1] == 1:
            temp = [[table[i]],1]
            flag = True
        elif flag and table[i+1] == 1:
            temp[0] += [table[i]]
        else:
            flag = False
            tltable += temp
            tltable += table[i:i+2]
            temp = []
    tltable += temp
    return tltable

def swap_unsign_sign(oltable):
    return [oltable[i]*-1 if i % 2 == 0 else oltable[i] for i in range(len(oltable))]

def generate_tables(origin_table, input_table, method):
    global prefix
    global separator
    global need_swap
    methods = ['Default', 'Natural', 'Order']
    choise, idx, res, readable, tltable, output_table = -1, [], [], [], [],''
    ns = shift_count(origin_table, input_table)
    if ns == 0:
        print('%s key can sovle this case.' % method)
        choise = methods.index(method)
        if method == methods[2]:
            if prefix:
                complete_match[2] = order_key_scripts[1].format(separator,prefix)
            else:
                complete_match[2] = order_key_scripts[0].format(separator)
        print('Script size using %s key: %d.' % (method,len(complete_match[methods.index(method)])))
    else:
        print('%s key can\'t sovle this case.' % method)
        print('Shift count in this method: %d.' % ns)
        idx = generate_shift_table(origin_table,input_table)
        res, readable = generate_run_length_table(idx)
        tltable = two_level_run_length(res)
        cidx = codecs.encode(zlib.compress(remove_white_space(idx).encode()),'base64').decode()
        cres = codecs.encode(zlib.compress(remove_white_space(res).encode()),'base64').decode()
        cres = '''a.loads(zlib.decompress(codecs.decode('%s'.encode(),'base64')).decode())''' % cres.replace('\n','')
        rres = swap_unsign_sign(res)
        crres = codecs.encode(zlib.compress(remove_white_space(rres).encode()),'base64').decode()
        crres = '''a.loads(zlib.decompress(codecs.decode('%s'.encode(),'base64')).decode())''' % crres.replace('\n','')
        need_swap = True if len(crres) < len(cres) else False
        ctltable = codecs.encode(zlib.compress(remove_white_space(tltable).encode()),'base64').decode()
        ctltable = '''a.loads(zlib.decompress(codecs.decode('%s'.encode(),'base64')).decode())''' % ctltable.replace('\n','')
        print('The most large consecutive segment length: %d.' % max(list(zip(*readable))[1]))
        print('Shift table plain text size: %d.' % len(remove_white_space(idx)))
        print('Shift table cipher text size: %d.' % len(cidx))
        print('One level run-length table plain text size: %d.' % len(remove_white_space(res)))
        print('One level run-length table cipher text size: %d.' % len(cres))
        print('After swapping, one level run-length table plain text size: %d.' % len(remove_white_space(rres)))
        print('After swapping, one level run-length table cipher text size: %d.' % len(crres))
        print('Two level run-length table plain text size: %d.' % len(remove_white_space(tltable)))
        print('Two level run-length table cipher text size: %d.' % len(ctltable))
        output_table = crres if need_swap else res
        if method == methods[2]:
            if prefix:
                incomplete_match[2] = incomplete_order_key[1].format(separator,prefix,output_table)
            else:
                incomplete_match[2] = incomplete_order_key[0].format(separator,output_table)
        else:
            incomplete_match[methods.index(method)]=incomplete_match[methods.index(method)].format(output_table)
        incomplete_match[methods.index(method)] = incomplete_match[methods.index(method)].replace('\n',',codecs,zlib\n',1)
        if need_swap:
            incomplete_match[methods.index(method)] = incomplete_match[methods.index(method)].replace('x+y','x-y',1)
        print('Script size using %s key: %d.' % (method,len(incomplete_match[methods.index(method)])))
    return choise, idx, res, readable, tltable, output_table

def default_key_method(origin_mapping):
    keys, values = zip(*origin_mapping.items())
    keys, values = sorted(keys), sorted(values)
    return [keys,values]

def natural_key(x):
    #[int(c)if c.isdigit()else c for c in re.split('(\d+)',pattern)]
    #x = re.split('\/',x)
    #x = [[int(c)if c.isdigit()else c.replace('[','') for c in re.split('(\d+)',pattern) ]for pattern in x]
    return [int(c)if c.isdigit()else c for c in re.split('(\d+)',x)]

def natural_key_method(origin_mapping):
    ori = [list(origin_mapping.keys()),list(origin_mapping.values())]
    return [sorted(x,key=natural_key) for x in ori]

def dimensions(string,i):
    it=list(re.finditer('\[([0-9]+)\]'*i,string))
    return (0,0) if not i else (i,it) if len(it) else dimensions(string,i-1)

def order_key(ori):
    K=ori[:]
    order=re.findall('\$[a-zA-Z](\d+)',ori)
    if order:
        d,ds = dimensions(ori,2)
        o=int(order[0])
        if d:
            K = ori[:ds[0].start()]+ds[-1 if len(ds) < o else o-1].group(0)+order[0]
        else:
            x_dim = list(re.finditer('[a-zA-Z]\d+\_\d+',ori))
            K= ori[:x_dim[0].start()]+x_dim[-1 if len(x_dim) < o else o-1].group(0)+order[0] if x_dim else ori
    else:
        K = ori
    return [int(c)if c.isdigit()else c for c in re.split('(\d+)',K)]

def find_separator_prefix(origin_mapping):
    voting = dict()
    have_order = [(k,v) for k, v in origin_mapping.items() if re.findall('\$[a-zA-Z]\d+',k) or re.findall('\$[a-zA-Z]\d+',v)]
    ko = [(k,v) for k, v in origin_mapping.items() if re.findall('\$[a-zA-Z]\d+',k)] 
    vo = [(k,v) for k, v in origin_mapping.items() if re.findall('\$[a-zA-Z]\d+',v)]
    if not (ko or vo):
        return '', ''
    flag = True if ko else False 
    for k, v in have_order:
        Order=re.search('\$[a-zA-Z](\d+)', k if flag else v)
        ks = re.split('/',k)
        vs = re.split('/',v)
        opos =[(i.find(Order.group(0))!=-1)for i in (ks if flag else vs)].index(True)
        folders = ks[opos-1] if flag else vs[opos-1] 
        folders = re.sub('\d+','',folders)
        folder = re.sub('\d+','',vs[opos-1] if flag else ks[opos-1])
        patterns = re.split(re.escape(folder),folders)
        for pattern in [i for i in patterns if i]:
            if pattern in voting:
                voting[pattern] += 1
            else:
                voting.setdefault(pattern,1)      
    if not voting:
        return '', ''
    voting = sorted(list(voting.items()),key=lambda x:x[1],reverse=True)
    return voting[0][0], (voting[1][0] if len(voting) > 1 and voting[0][0] not in voting[1][0] else '')

def _order_key(ori,sep,pre):
    new  =  ori[:]
    match = re.findall('\$[a-zA-Z]\d+',ori)
    if not match:
        new=ori
    else:
        m1=re.split('/',  ori) 
        opos =[(i.find(match[0])!=-1)for i in m1].index(True)
        temp = '/'.join(m1[:opos-1])
        m=re.split(sep,m1[opos-1])
        new=(temp+'/' if temp else '')+m[int(match[0][2:])-1]
    #new = re.sub("\/h0\/i1\/i1\/l0",'',re.sub('\[','',new))
    return [int(c)if c.isdigit()else c for c in re.split('(\d+)',new)]

def order_key_(ori,sep,pre):
    new  =  ori[:]
    match = re.findall('\$[a-zA-Z]\d+',ori)
    if not match:
        new=ori
    else:       
        m1=re.split('/',  ori) 
        opos =[(i.find(match[0])!=-1)for i in m1].index(True)
        temp = '/'.join(m1[:opos-1])
        m=re.split(sep,m1[opos-1])
        cut=re.split(pre,m[0])
        m[0] = m[0]if len(cut) == 1 else cut[1]
        new=(temp+'/' if temp else '')+m[int(match[0][2:])-1]
    return [int(c)if c.isdigit()else c for c in re.split('(\d+)',new)]

def order_key_method(origin_mapping):
    global separator
    global prefix
    separator, prefix = find_separator_prefix(origin_mapping)
    print('Separator: %s. Prefix pattern: %s.' % ( (separator if separator else 'None'),(prefix if prefix else 'None') ) ) 
    if not(separator or prefix):
        return []
    ori=[sorted(list(origin_mapping.keys())),sorted(list(origin_mapping.values()))]
    func=order_key_ if prefix else _order_key
    _ori=[[func(j,separator,prefix)for j in i] for i in ori]
    KV=[list(zip(ori[i],_ori[i])) for i in[0,1]]
    KV=[sorted(x,key=lambda x:x[1]) for x in KV]
    return [[z[0]for z in y]for y in KV]

def check_type(origin_mapping):
    global complete_match
    global incomplete_match
    global order_key_scripts
    output_script = ""
    choise = -1
    methods = ['Default', 'Natural', 'Order']
    functions = [default_key_method, natural_key_method, order_key_method]
    data = [ [ [] for j in range(5) ] for i in range(len(methods)) ] # index, one level run-length, readable, two level run-length
    for i in range(len(methods)):
        if choise == -1:
            input_table = functions[i](origin_mapping)   
            if not input_table:
                print('%s key can\'t apply to this case.' % methods[i])
                continue
            flag, data[i][0], data[i][1], data[i][2], data[i][3], data[i][4]  = generate_tables(origin_mapping, input_table, methods[i])
            choise = i if flag == i else -1   
    if choise != -1:
        output_script = complete_match[choise]
    else:
        print('No method can solve this case.')
        total= lambda x,y: len("".join(str(x).split(' ')))+len(y)
        two_level = [(methods[i],len(incomplete_match[i]))if data[i][0] else (methods[i],-1) for i in range(len(methods))] 
        two_level = list(filter(lambda x: x[1] > -1,sorted(two_level,key=lambda x:x[1])))
        t_index = methods.index(two_level[0][0])
        script_candidate = [ (len("".join(str(data[i][1]).split(' '))),len(data[i][4]),len(incomplete_match[i]))for i in range(len(methods)) ]
        print('Script candidate: %s.' % str(dict(zip(methods,script_candidate))) )
        print('Using %s key table to record the original relationship.' % methods[t_index])
        index, one_level, readable, two_level, output_table = data[t_index][0], data[t_index][1], data[t_index][2], data[t_index][3], data[t_index][4] 
        output_script = incomplete_match[t_index]
    print('Script size: %d.' % len(output_script))
    return output_script
def test_all(case):
    print('case: {}'.format(case))
    map_in_name, script_name = '../cases/case{}.json'.format(case), 'test.py'
    map_in_file = open(map_in_name)
    origin_mapping = json.load(map_in_file)
    map_in_file.close()  
    #print('Script:\n%s\n'%check_type(origin_mapping))
    check_type(origin_mapping)
    print()
if __name__ == '__main__':
    ta = [0,1,2,3,4,5,6,7,8]
    tt=[6]
    for i in tt:
        test_all(i)
    


case: 6
Default key can't sovle this case.
Shift count in this method: 1225.
The most large consecutive segment length: 8423.
Shift table plain text size: 49042.
Shift table cipher text size: 341.
One level run-length table plain text size: 1827.
One level run-length table cipher text size: 262.
After swapping, one level run-length table plain text size: 1826.
After swapping, one level run-length table cipher text size: 262.
Two level run-length table plain text size: 2057.
Two level run-length table cipher text size: 278.
Script size using Default key: 2752.
Natural key can't sovle this case.
Shift count in this method: 122.
The most large consecutive segment length: 10176.
Shift table plain text size: 47980.
Shift table cipher text size: 199.
One level run-length table plain text size: 290.
One level run-length table cipher text size: 186.
After swapping, one level run-length table plain text size: 277.
After swapping, one level run-length table cipher text size: 182.
Two level run-l

In [331]:
map_in_file = open('../cases/case{}.json'.format(1))
origin_mapping = json.load(map_in_file)
map_in_file.close()  

def _find_separator_prefix(origin_mapping,order_sign):
    voting = dict()
    have_order = [(k,v) for k, v in origin_mapping.items() if re.findall(order_sign,k) or re.findall(order_sign,v)]
    ko = [(k,v) for k, v in origin_mapping.items() if re.findall(order_sign,k)] 
    vo = [(k,v) for k, v in origin_mapping.items() if re.findall(order_sign,v)]
    print(len(ko),len(vo))
    if not (ko or vo):
        return '', ''
    flag = ( True if len(ko)>len(vo) else False ) if ko and vo else (True if ko else False) 
    for k, v in have_order:
        Order=re.search(order_sign, k if flag else v)
        ks = re.split('/',k)
        vs = re.split('/',v)
        opos =[(i.find(Order.group(0))!=-1)for i in (ks if flag else vs)].index(True)
        folders = ks[opos-1] if flag else vs[opos-1] 
        folders = re.sub('\d+','',folders)
        folder = re.sub('\d+','',vs[opos-1] if flag else ks[opos-1])
        patterns = re.split(re.escape(folder),folders)
        for pattern in [i for i in patterns if i]:
            if pattern in voting:
                voting[pattern] += 1
            else:
                voting.setdefault(pattern,1)      
    if not voting:
        return '', ''
    voting = sorted(list(voting.items()),key=lambda x:x[1],reverse=True)
    separator = voting[0][0]
    prefix = voting[1][0] if len(voting) > 1 and voting[1][0].find(voting[0][0]) == -1 else ''
    fake_separators = {}
    for k, v in have_order:
        Order=re.search(order_sign, k if flag else v)
        if not Order:
            continue
        ks = re.split('/',k)
        vs = re.split('/',v)
        opos =[(i.find(Order.group(0))!=-1)for i in (ks if flag else vs)].index(True)
        folders = ks[opos-1] if flag else vs[opos-1] 
        folder = vs[opos-1] if flag else ks[opos-1]
        choise = int(Order.group(0)[2:])
        if folders.find(separator) > -1:
            if prefix and folders.find(prefix) == 0:
                folders = folders[len(prefix):]
            else:
                prefix = ''
            s_folders = re.split(separator,folders)
            choose_folder = s_folders[choise-1]
            if folder != choose_folder:
                fake = choose_folder+separator
                if fake in fake_separators:
                    fake_separators[fake] += 1
                else:
                    fake_separators.setdefault(fake,1)
        else:
            return '','',{}
    
    return separator, prefix, fake_separators


def _order_key_(ori,sep,pre,fake_separators):
    new  =  ori[:]
    match = re.findall('[a-zA-Z]\$\d+',ori)
    if not match:
        new=ori
    m1=re.split('/',  ori) 
    opos =[(i.find(match[0])!=-1)for i in m1].index(True)
    temp = '/'.join(m1[:opos-1])
    flag = True
    for fake in fake_separators:
        if ori.find(fake) > -1:
            flag = False
            break
    m=re.split(sep,m1[opos-1])
    if flag and m1[opos-1].find(sep) > -1:
        new=(temp+'/' if temp else '')+m[int(match[0][2:])-1]
    else:
        new = ori
    return [int(c)if c.isdigit()else c for c in re.split('(\d+)',new)]

def __order_key_(ori,sep,pre,fake_separators):
    new  =  ori[:]
    match = re.findall('[a-zA-Z]\$\d+',ori)
    if not match:
        new=ori
    m1=re.split('/',  ori) 
    opos =[(i.find(match[0])!=-1)for i in m1].index(True)
    temp = '/'.join(m1[:opos-1])
    flag = True
    for fake in fake_separators:
        if ori.find(fake) > -1:
            flag = False
            break
    m=re.split(sep,m1[opos-1])
    m[0] = m[0][len(pre):] if m[0].find(pre) > -1 else m[0]
    if flag and m1[opos-1].find(sep) > -1:
        new=(temp+'/' if temp else '')+m[int(match[0][2:])-1]
    else:
        new = ori
    return [int(c)if c.isdigit()else c for c in re.split('(\d+)',new)]

def _order_key_method(origin_mapping):
    sign = '[a-zA-Z]\$\d+'
    separator, prefix, fake_separators= _find_separator_prefix(origin_mapping,sign)
    print(separator, prefix, '!!')
    print('Separator: %s. Prefix pattern: %s.' % ( (separator if separator else 'None'),(prefix if prefix else 'None') ) ) 
    if not(separator or prefix):
        return []
    ori=[sorted(list(origin_mapping.keys())),sorted(list(origin_mapping.values()))]
    func=__order_key_ if prefix else _order_key_
    _ori=[[func(j,separator,prefix,fake_separators)for j in i] for i in ori]
    KV=[list(zip(ori[i],_ori[i])) for i in[0,1]]
    KV=[sorted(x,key=lambda x:x[1]) for x in KV]
    return [[z[0]for z in y]for y in KV]
test = _order_key_method(origin_mapping)
idx = generate_shift_table(origin_mapping,test)
res, readable = generate_run_length_table(idx)
print(res)

10732 10732
  !!
Separator: None. Prefix pattern: None.
[]


In [290]:
test = '_YZU_123'
re.split('_',test)

['', 'YZU', '123']