In [21]:
import itertools
import pandas as pd
import numpy as np
from IPython.display import display

In [2]:
def get_all_substrings(input_string):
  length = len(input_string)
  return set([input_string[i:j+1] for i in range(length) for j in range(i,length)])

In [3]:
def split_string(a_string):
    return ["x = x.split(\"{}\")".format(sep) for sep in get_all_substrings(a_string) if len(sep) <= 2]

In [4]:
split_string("abc d")

['x = x.split("b")',
 'x = x.split("d")',
 'x = x.split("bc")',
 'x = x.split("ab")',
 'x = x.split("c ")',
 'x = x.split(" ")',
 'x = x.split(" d")',
 'x = x.split("c")',
 'x = x.split("a")']

In [5]:
def get_select_combinations(a_list):
    list_of_list = [list(itertools.combinations(range(len(a_list)), k)) for k in range(1, len(a_list) + 1)]
    reverse_list_of_list = [list(itertools.combinations(np.linspace(-len(a_list), -1, len(a_list)).astype(int), k)) for k in range(1, len(a_list) + 1)]
    select_list = list(itertools.chain.from_iterable(list_of_list + reverse_list_of_list))
    func_str_list = ["x = [x[i] for i in {}]".format(selected) for selected in select_list if len(selected) <= 3]
    return func_str_list

In [6]:
get_select_combinations(["a", "c", "b"])

['x = [x[i] for i in (0,)]',
 'x = [x[i] for i in (1,)]',
 'x = [x[i] for i in (2,)]',
 'x = [x[i] for i in (0, 1)]',
 'x = [x[i] for i in (0, 2)]',
 'x = [x[i] for i in (1, 2)]',
 'x = [x[i] for i in (0, 1, 2)]',
 'x = [x[i] for i in (-3,)]',
 'x = [x[i] for i in (-2,)]',
 'x = [x[i] for i in (-1,)]',
 'x = [x[i] for i in (-3, -2)]',
 'x = [x[i] for i in (-3, -1)]',
 'x = [x[i] for i in (-2, -1)]',
 'x = [x[i] for i in (-3, -2, -1)]']

In [7]:
def get_permutations(a_list):
    permutations_list = list(itertools.permutations(range(len(a_list))))
    func_str_list = ["x = [x[i] for i in {}]".format(permutation) for permutation in permutations_list]
    return func_str_list

In [8]:
get_permutations(["a", "c", "b"])

['x = [x[i] for i in (0, 1, 2)]',
 'x = [x[i] for i in (0, 2, 1)]',
 'x = [x[i] for i in (1, 0, 2)]',
 'x = [x[i] for i in (1, 2, 0)]',
 'x = [x[i] for i in (2, 0, 1)]',
 'x = [x[i] for i in (2, 1, 0)]']

In [9]:
def join_list_to_string(a_list, output_string):
    return ["x = \"{}\".join(x)".format(sep) for sep in list(get_all_substrings(output_string)) + [""] if len(sep) <= 2]

In [10]:
join_list_to_string(["a", "c", "b"], "abds")

['x = "b".join(x)',
 'x = "d".join(x)',
 'x = "ab".join(x)',
 'x = "ds".join(x)',
 'x = "s".join(x)',
 'x = "bd".join(x)',
 'x = "a".join(x)',
 'x = "".join(x)']

In [11]:
def apply_meta_func(input_obj, func):
    func_list = func(input_obj)
    def apply_func(func):
        d = {"x": input_obj}
        exec(func, d)
        return d["x"]
    obj_list = list(map(apply_func, func_list))
    return func_list, obj_list 

In [12]:
def apply_meta_func_on_group(group, meta_func):
    input_obj = group.iloc[0]["obj"]
    func_str = group.iloc[0]["func_str"]
    func_list, obj_list = apply_meta_func(input_obj, meta_func)
    output_data_frame = pd.DataFrame([obj_list, func_list]).T
    output_data_frame.columns = ["obj", "func_str"]
    func_str += "\n" + output_data_frame["func_str"]
    output_data_frame["func_str"] = func_str    
    return output_data_frame

In [13]:
def apply_func_on_input_output_example(row, input_string, output_string):
    func_str = row
    d = {"x": input_string}
    try:
        exec(func_str, d)
        return d["x"] == output_string
    except:
        return False

In [14]:
def code(input_output_examples_list, is_log=False):
    input_string = input_output_examples_list[0][0]
    output_string = input_output_examples_list[0][1]
    input_obj_list = [input_string]
    input_func_str_list = [""]
    func_data_frame = pd.DataFrame([input_obj_list, input_func_str_list]).T
    func_data_frame.columns = ["obj", "func_str"]
    
    join_list_to_string_with_output = lambda a_list: join_list_to_string(a_list, output_string)
    
    if is_log:
        meta_func_list = [split_string, get_select_combinations, join_list_to_string_with_output]
    else:
        meta_func_list = [split_string, get_select_combinations, get_permutations, join_list_to_string_with_output]    
    
    for meta_func in meta_func_list:
        func_data_frame = func_data_frame.groupby(["func_str"]).apply(apply_meta_func_on_group, meta_func)
        func_data_frame.reset_index(drop=True, inplace=True)
        
    print("total number of functions: {}".format(len(func_data_frame)))
        
    possible_code_series = func_data_frame[func_data_frame.obj == output_string]["func_str"]
    
    for input_output_example in input_output_examples_list[1:]:
        is_good_func_series = possible_code_series.apply(apply_func_on_input_output_example, args=(input_output_example))
        possible_code_series = possible_code_series[is_good_func_series]
        
    return possible_code_series

In [15]:
input_output_examples_list = [("aa bb cc", "bbmmccmmaa"), ("ll dd ff", "ddmmffmmll")]
possible_code_series = code(input_output_examples_list)
possible_code_series.apply(print)

total number of functions: 2288

x = x.split(" ")
x = [x[i] for i in (-3, -2, -1)]
x = [x[i] for i in (1, 2, 0)]
x = "mm".join(x)

x = x.split(" ")
x = [x[i] for i in (0, 1, 2)]
x = [x[i] for i in (1, 2, 0)]
x = "mm".join(x)


146    None
263    None
Name: func_str, dtype: object

In [16]:
input_output_examples_list = [("ffllnn", "nnff"), ("kkllmm", "mmkk")]
possible_code_series = code(input_output_examples_list)
possible_code_series.apply(print)

total number of functions: 780

x = x.split("l")
x = [x[i] for i in (-3, -1)]
x = [x[i] for i in (1, 0)]
x = "".join(x)

x = x.split("l")
x = [x[i] for i in (-3, -2, -1)]
x = [x[i] for i in (1, 2, 0)]
x = "".join(x)

x = x.split("l")
x = [x[i] for i in (-3, -2, -1)]
x = [x[i] for i in (2, 0, 1)]
x = "".join(x)

x = x.split("l")
x = [x[i] for i in (-3, -2, -1)]
x = [x[i] for i in (2, 1, 0)]
x = "".join(x)

x = x.split("l")
x = [x[i] for i in (0, 1, 2)]
x = [x[i] for i in (1, 2, 0)]
x = "".join(x)

x = x.split("l")
x = [x[i] for i in (0, 1, 2)]
x = [x[i] for i in (2, 0, 1)]
x = "".join(x)

x = x.split("l")
x = [x[i] for i in (0, 1, 2)]
x = [x[i] for i in (2, 1, 0)]
x = "".join(x)

x = x.split("l")
x = [x[i] for i in (0, 2)]
x = [x[i] for i in (1, 0)]
x = "".join(x)

x = x.split("ll")
x = [x[i] for i in (-2, -1)]
x = [x[i] for i in (1, 0)]
x = "".join(x)

x = x.split("ll")
x = [x[i] for i in (0, 1)]
x = [x[i] for i in (1, 0)]
x = "".join(x)


311    None
347    None
353    None
359    None
401    None
407    None
413    None
425    None
473    None
491    None
Name: func_str, dtype: object

# logs

In [17]:
def get_max_length_common_string(string_1, string_2):
    union_list = [substring for substring in get_all_substrings(string_1) if substring in string_2]
    common_string = union_list[pd.Series(union_list).apply(len).argmax()]
    return common_string

In [18]:
def get_info_from_log(log_lines, output_strings_list):
    log_lines = [line.replace("\n", "") for line in log_lines]
    interesting_lines = []
    for line in log_lines:
        for output_string in output_strings_list:
            if output_string in line:        
                interesting_lines.append((line, output_string))
                
    common_string = get_max_length_common_string(interesting_lines[0][0], interesting_lines[1][0])
    
    common_string_no_numeric_trail = common_string[:(-pd.Series(list(common_string)).str.isnumeric().values[::-1].argmin())]
    
    input_output_examples_list = interesting_lines
    possible_code_series = code(input_output_examples_list, True)
    
    possible_code_series.apply(print)
    
    inner_func = "\n".join(["\t" + line for line in possible_code_series.iloc[0].split("\n") if line != ""] )
    
    output_list = []
    for line in log_lines:
        if common_string_no_numeric_trail in line:
            d = {"x": line}
            exec(possible_code_series.iloc[0], d)            
            output_list.append(d["x"])
            
    func = """
output_list = []
for line in log_lines:
    if \"{}\" in line:
        x = line
{}
        output_list.append(x)
""".format(common_string_no_numeric_trail, inner_func)
    
    return output_list, func

In [19]:
file_path = r".\log_example.txt"
log_lines = open(file_path, "r").readlines()
log_lines

['Aug  1 18:27:45 knight sshd[20325]: Illegal user test from 218.49.183.17\n',
 'Aug  1 18:27:46 knight sshd[20325]: Failed password for illegal user test from 218.49.183.17 port 48849 ssh2\n',
 'Aug  1 18:27:46 knight sshd[20325]: error: Could not get shadow information for NOUSER\n',
 'Aug  1 18:27:48 knight sshd[20327]: Illegal user guest from 218.49.183.17\n',
 'Aug  1 18:27:49 knight sshd[20327]: Failed password for illegal user guest from 218.49.183.17 port 49090 ssh2\n',
 'Aug  1 18:27:49 knight sshd[20327]: error: Could not get shadow information for NOUSER\n',
 'Aug  1 18:27:52 knight sshd[20329]: Failed password for admin from 218.49.183.17 port 49266 ssh2\n',
 'Aug  1 18:27:56 knight sshd[20331]: Failed password for admin from 218.49.183.17 port 49468 ssh2\n',
 'Aug  1 18:27:58 knight sshd[20334]: Illegal user user from 218.49.183.17\n',
 'Aug  1 18:27:59 knight sshd[20334]: Failed password for illegal user user from 218.49.183.17 port 49680 ssh2\n',
 'Aug  1 18:27:59 knight

In [22]:
output_strings_list = ["port 40009", "port 48849"]

output_list, func = get_info_from_log(log_lines, output_strings_list)

display(output_list)
print(func)

total number of functions: 63936

x = x.split(" ")
x = [x[i] for i in (-3, -2)]
x = " ".join(x)


['port 48849',
 'port 49090',
 'port 49266',
 'port 49468',
 'port 49680',
 'port 49869',
 'port 50063',
 'port 50245',
 'port 50671',
 'port 52244',
 'port 52416',
 'port 52558',
 'port 52818',
 'port 52851',
 'port 53014',
 'port 53040',
 'port 53192',
 'port 53230',
 'port 53404',
 'port 53425',
 'port 53571',
 'port 53615',
 'port 54033',
 'port 54078',
 'port 54243',
 'port 54285',
 'port 54423',
 'port 39604',
 'port 39811',
 'port 40009',
 'port 40217',
 'port 40470',
 'port 40973',
 'port 41159',
 'port 41541',
 'port 41630']


output_list = []
for line in log_lines:
    if " from 218.49.183.17 port " in line:
        x = line
	x = x.split(" ")
	x = [x[i] for i in (-3, -2)]
	x = " ".join(x)
        output_list.append(x)

