In [None]:
import random

## Generate Queries implemantation

In [None]:
stmt_numbers = [i for i in range(1, 11)]
# varnames = [abcxyz][1-5]
var_names = []
for char in "a b c x y z".split():
    for i in range(5):
        var_names.append(char+str(i))
constants = [2,4,6,8,10]

def generate_queries(num_queries=5, tuple_size=1, num_clauses=5, fname="queries.txt", stmt_numbers=stmt_numbers, var_names=var_names, constants=constants):
    """
    Generates a set of PQL queries and saves to fname.
    
    :param num_queries: int Number of queries to generate. 
                        Defaults to 5
    :param tuple_size: non-negative int or None. Number of synonyms in Select clause. 
                       size = 0: select BOOLEAN. size = None: random length between 1 and 10
                       Defaults to 1
    :param num_clauses: non-negative int or None. Number of clauses in a query. 
                       size = None: random length between 1 and 100
                       Defaults to 5
    :param fname: str name of output query file
    :param stmt_numbers: List of available statement numbers
    :param var_names: List of valid variable names
    :param constants: List of available constants
    :return: 
    """
    queries = []
    for i in range(num_queries):
        declarations, query = generate_query(tuple_size, num_clauses, stmt_numbers, var_names, constants)
        query_str = [
                 f"{i} - Query {i}",
                 declarations,
                 query,
                 "",
                 "5000"
            ]
        queries.extend(query_str)
    with open(fname, "w") as f:
        f.write("\n".join(queries))
    

In [None]:
def generate_query(tuple_size, num_clauses, stmt_numbers, var_names, constants):
    if tuple_size == None:
        tuple_size = random.randint(1,10)
    if num_clauses == None:
        num_clauses = random.randint(1,100)
    synonyms = set()
    clauses_str = ""
    for i in range(num_clauses):
        syn_clause, clause_str = generate_clause(stmt_numbers, var_names, constants)
        synonyms = synonyms | syn_clause
        clauses_str += clause_str
    query = generate_select(tuple_size, synonyms) + clauses_str
    return get_declarations(synonyms), query
    

def generate_clause(stmt_numbers, var_names, constants):
    unif = random.random()
    if unif < 1/3:
        return generate_such_that(var_names, stmt_numbers, constants)
    elif unif < 2/3:
        return generate_pattern(var_names, constants)
    else:
        return generate_with(var_names, constants)

def generate_select(tuple_size, synonyms):
    if tuple_size == 0:
        return "Select BOOLEAN "
    if not synonyms:
        synonyms = {("stmt", "stm1"), ("stmt", "stm2"), ("stmt", "stm3")}
    chosen = []
    for i in range(tuple_size):
        syn = random.choice(list(synonyms))
        chosen.append(f"{syn[1]}")
    return f"Select <{', '.join(chosen)}> "

def get_declarations(synonyms):
    syn_dict = {}
    for s in synonyms:
        if s[0] not in syn_dict:
            syn_dict[s[0]] = {s[1]}
        else:
            syn_dict[s[0]].add(s[1])
    declaration_str = ""
    for entity in syn_dict:
        declaration_str += f"{entity} {', '.join(list(syn_dict[entity]))}; "
    return declaration_str

def generate_such_that(var_names, stmt_numbers, constants):
    synonyms = set()
    var_names1 = ['"' + var_name + '"' for var_name in var_names]
    relation_dict = {
        "Parent":[
            [[stmt_numbers, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call"]],
            [[stmt_numbers, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call"]]
        ],
        "Parent*":[
            [[stmt_numbers, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call"]],
            [[stmt_numbers, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call"]]
        ],
        "Follows":[
            [[stmt_numbers, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call"]],
            [[stmt_numbers, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call"]]
        ],
        "Follows*":[
            [[stmt_numbers, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call"]],
            [[stmt_numbers, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call"]]
        ],
        "Next":[
            [[stmt_numbers, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call"]],
            [[stmt_numbers, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call"]]
        ],
        "Next*":[
            [[stmt_numbers, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call"]],
            [[stmt_numbers, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call"]]
        ],
        "Uses":[
            [[stmt_numbers, var_names1, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call", "procedure"]],
            [[var_names1, ["_"]], ['variable']]
        ],
        "Modifies":[
            [[stmt_numbers, var_names1, ["_"]], ['stmt', 'read', 'print', 'while', 'if', 'assign', "call", "procedure"]],
            [[var_names1, ["_"]], ['variable']]
        ],
        "Affects":[
            [[stmt_numbers, ["_"]], ['stmt', 'assign']],
            [[stmt_numbers, ["_"]], ['stmt', 'assign']]
        ],
        "Affects*":[
            [[stmt_numbers, ["_"]], ['stmt', 'assign']],
            [[stmt_numbers, ["_"]], ['stmt', 'assign']]
        ],
        "Calls":[
            [[var_names1, ["_"]], ['procedure']],
            [[var_names1, ["_"]], ['procedure']]
        ],
        "Calls*":[
            [[var_names1, ["_"]], ['procedure']],
            [[var_names1, ["_"]], ['procedure']]
        ]
    }
    rel = random.choice(list(relation_dict.keys()))
    ref1_args, ref2_args = relation_dict[rel]
    ref1 = generate_ref(ref1_args[0], ref1_args[1])
    ref2 = generate_ref(ref2_args[0], ref2_args[1])
    if ref1[0]:
        synonyms.add(ref1)
    if ref2[0]:
        synonyms.add(ref2)
    clause = f" such that {rel}({ref1[1]}, {ref2[1]}) "

    return synonyms, clause

def generate_pattern(var_names, constants):
    synonyms = set()
    entref = generate_ref([var_names, ["_"]], ["variable"])
    if entref[0]:
        synonyms.add(entref)
    unif = random.random()
    if unif < 1/3:
        syn_assign = generate_synonym(["assign"])
        synonyms.add(syn_assign)
        return synonyms, f" pattern {syn_assign[1]}({entref[1]}, {generate_expression(var_names, constants)}) "
    elif unif < 2/3:
        syn_while = generate_synonym(["while"])
        synonyms.add(syn_while)
        return synonyms, f" pattern {syn_while[1]}({entref[1]}, _) "
    else:
        syn_if = generate_synonym(["if"])
        synonyms.add(syn_if)
        return synonyms, f" pattern {syn_if[1]}({entref[1]}, _, _) "
    return synonyms, clause

def generate_with(var_names, constants):
    attr_name_dict = {
        "read": "varName",
        "print": "varName",
        "call": "procName",
        "variable": "varName",
        "procedure": "procName"
    }
    attr_int_dict = {
    "stmt":"stmt#",
    "read": "stmt#",
    "print": "stmt#",
    "call": "stmt#",
    "while": "stmt#",
    "if": "stmt#",
    "assign": "stmt#",
    "constant": "value",
    }
    synonyms = set()
    unif = random.random()
    if unif < 0.5:
        var_names = ['"' + var_name + '"' for var_name in var_names]
        attr_dict = attr_name_dict
        reftypes = [var_names]
    else:
        attr_dict = attr_int_dict
        reftypes = [constants]
    sym1, attr1 = generate_ref(reftypes, list(attr_dict.keys()))
    sym2, attr2 = generate_ref(reftypes, list(attr_dict.keys()))
    if sym1:
        synonyms.add((sym1, attr1))
        attr1 += "." + attr_dict[sym1]
    if sym2:
        synonyms.add((sym2, attr2))
        attr2 += "." + attr_dict[sym2]
    clause = f"with {attr1}={attr2} "
    return synonyms, clause



def generate_ref(reftypes, entities):
    choice = random.randint(0, len(reftypes))
    if choice == len(reftypes):
        return generate_synonym(entities)
    else:
        return "", random.choice(reftypes[choice])

def generate_synonym(entities):
    entity = random.choice(entities)
    return entity, entity[:3] + str(random.randint(1, 10))

def generate_expression(var_names, constants):
    symbols = var_names + [str(c) for c in constants]
    operations = ["+", "-", "*", "/"]
    expression = random.choice(symbols) + random.choice(operations) + random.choice(symbols)
    unif = random.random()
    while unif > 0.9:
        expression = "(" + expression + ")" + random.choice(operations) + random.choice(symbols)
        unif = random.random()

    unif = random.random()
    if unif < 1/3:
        return "_"
    elif unif < 2/3:
        return '"' + expression + '"'
    else:
        return '_"' + expression + '"_'

In [None]:
# generate_queries(100, 2, 10, "Auto_100_2_10_queries.txt")

In [None]:
# var_names = ["x", "y", "z"]
# constants = [2,4,6,8,10]
# for i in range(20):
    # print(generate_pattern(var_names, constants))
    # print(generate_such_that())
    # print(generate_with(var_names, constants))
    # print(generate_query(0, 5, stmt_numbers, var_names, constants))
    # print(generate_queries())

## Generate source implementation

In [None]:
expression_depth = 0
statement_depth = 0
max_depth = 8
expression_max_depth = 3

# varnames = [abcxyz][1-5]
variable_names = []
for char in "a b c x y z".split():
    for i in range(5):
        variable_names.append(char+str(i))

def get_variable():
    return random.choice(variable_names)


def get_expression():
    global expression_depth
    global expression_max_depth
    expression_depth += 1
    type = random.randint(0, 2)
    if (type == 0 and expression_depth < expression_max_depth):
        result = get_expression() + get_optional_whitespace() + "+" + \
            get_optional_whitespace() + get_term()
        expression_depth -= 1
        return result
    if (type == 1 and expression_depth < expression_max_depth):
        result = get_expression() + get_optional_whitespace() + "-" + \
            get_optional_whitespace() + get_term()
        expression_depth -= 1
        return result
    result = get_term()
    expression_depth -= 1
    return result


def get_term():
    global expression_depth
    global expression_max_depth
    expression_depth += 1
    type = random.randint(0, 3)
    if (type == 0 and expression_depth < expression_max_depth):
        result = get_term() + get_optional_whitespace() + "*" + \
            get_optional_whitespace() + get_factor()
        expression_depth -= 1
        return result
    if (type == 1 and expression_depth < expression_max_depth):
        result = get_term() + get_optional_whitespace() + "/" + \
            get_optional_whitespace() + get_factor()
        expression_depth -= 1
        return result
    if (type == 2 and expression_depth < expression_max_depth):
        result = get_term() + get_optional_whitespace() + "%" + \
            get_optional_whitespace() + get_factor()
        expression_depth -= 1
        return result
    result = get_factor()
    expression_depth -= 1
    return result


def get_factor():
    global expression_depth
    global expression_max_depth
    type = random.randint(0, 2)
    if (type == 0 and expression_depth < expression_max_depth):
        return "(" + get_optional_whitespace() + get_expression() + get_optional_whitespace() + ")"
    if (type == 1):
        return get_variable()
    return str(random.choice(constants))


def get_logical_expression():
    global expression_depth
    global expression_max_depth
    type = random.randint(0, 3)
    expression_depth += 1
    if (type == 0 and expression_depth < expression_max_depth):
        result = "!" + get_optional_whitespace() + "(" + get_optional_whitespace() + \
            get_logical_expression() + get_optional_whitespace() + ")"
        expression_depth -= 1
        return result
    if (type == 1 and expression_depth < expression_max_depth):
        result = "(" + get_optional_whitespace() + \
            get_logical_expression() + get_optional_whitespace() + ")"
        result += get_optional_whitespace() + "&&" + get_optional_whitespace()
        result += "(" + get_optional_whitespace() + \
            get_logical_expression() + get_optional_whitespace() + ")"
        expression_depth -= 1
        return result
    if (type == 2 and expression_depth < expression_max_depth):
        result = "(" + get_optional_whitespace() + \
            get_logical_expression() + get_optional_whitespace() + ")"
        result += get_optional_whitespace() + "||" + get_optional_whitespace()
        result += "(" + get_optional_whitespace() + \
            get_logical_expression() + get_optional_whitespace() + ")"
        expression_depth -= 1
        return result
    result = get_relational_expression()
    expression_depth -= 1
    return result


def get_relational_expression():
    type = random.randint(0, 5)
    if (type == 0):
        return get_relational_factor() + get_optional_whitespace() + ">" + get_optional_whitespace() + get_relational_factor()
    if (type == 1):
        return get_relational_factor() + get_optional_whitespace() + ">=" + get_optional_whitespace() + get_relational_factor()
    if (type == 2):
        return get_relational_factor() + get_optional_whitespace() + "<" + get_optional_whitespace() + get_relational_factor()
    if (type == 3):
        return get_relational_factor() + get_optional_whitespace() + "<=" + get_optional_whitespace() + get_relational_factor()
    if (type == 4):
        return get_relational_factor() + get_optional_whitespace() + "==" + get_optional_whitespace() + get_relational_factor()
    return get_relational_factor() + get_optional_whitespace() + "!=" + get_optional_whitespace() + get_relational_factor()


def get_relational_factor():
    type = random.randint(0, 2)
    if (type == 0):
        return get_variable()
    if (type == 1):
        return str(random.randint(0, 100))
    return get_expression()


def get_whitespace():
    return " "


def get_optional_whitespace():
    return " "* random.randint(0, 2)


def get_statement(call_procedure_names):
    global statement_depth
    global max_depth
    type = random.randint(0, 12)
    if (type == 0 and statement_depth < max_depth):
        return get_if_statement(call_procedure_names)
    if (type == 1 and statement_depth < max_depth):
        return get_while_statement(call_procedure_names)
    if (type <= 3):
        return get_read_statement(call_procedure_names)
    if (type <= 5):
        return get_print_statement(call_procedure_names)
    if (type <= 7 and len(call_procedure_names) > 0):
        return get_call_statement(call_procedure_names)
    return get_assign_statement(call_procedure_names)


def get_read_statement(call_procedure_names):
    return "read" + get_whitespace() + get_variable() + get_optional_whitespace() + ";\n"


def get_print_statement(call_procedure_names):
    return "print" + get_whitespace() + get_variable() + get_optional_whitespace() + ";\n"


def get_assign_statement(call_procedure_names):
    return get_variable() + get_optional_whitespace() + "=" + get_optional_whitespace() + get_expression() + get_optional_whitespace() + ";\n"

def get_call_statement(call_procedure_names):
    return f"call {random.choice(call_procedure_names)};\n"

def get_while_statement(call_procedure_names):
    global statement_depth
    statement_depth += 1
    result = "while" + get_optional_whitespace()
    result += "(" + get_optional_whitespace() + get_logical_expression() + \
        get_optional_whitespace() + ")" + get_optional_whitespace()
    result += "{\n" + get_optional_whitespace()
    statement_count = random.randint(1, 5)
    for _ in range(statement_count):
        result += get_statement(call_procedure_names) + get_optional_whitespace()
    result += "}"
    statement_depth -= 1
    return result


def get_if_statement(call_procedure_names):
    global statement_depth
    statement_depth += 1
    result = "if" + get_optional_whitespace()
    result += "(" + get_optional_whitespace() + get_logical_expression() + \
        get_optional_whitespace() + ")" + get_optional_whitespace()
    result += "then" + get_optional_whitespace() + "{\n"
    statement_count = random.randint(1, 5)
    for _ in range(statement_count):
        result += get_statement(call_procedure_names) + get_optional_whitespace()
    result += "}" + get_optional_whitespace() + "else" + get_optional_whitespace() + \
        "{\n" + get_optional_whitespace()
    statement_count = random.randint(1, 5)
    for _ in range(statement_count):
        result += get_statement(call_procedure_names) + get_optional_whitespace()
    result += "}"
    statement_depth -= 1
    return result


def get_procedure(name="generated", statement_count=10, call_procedure_names=[]):
    result = "procedure" + get_whitespace() + name + \
        get_optional_whitespace() + "{\n"
    for _ in range(statement_count):
        result += get_statement(call_procedure_names) + get_optional_whitespace()
    result += "}"
    return result

def get_procedures(statement_count, call_procedure_names, fname="source.txt"):
    source = ""
    for i in range(len(call_procedure_names)):
        source += get_procedure(call_procedure_names[i], statement_count, call_procedure_names[i+1:]) + "\n\n"
    with open(fname, "w") as f:
        f.write(source)
    # return source


# print(get_procedures(5, ["a", "b", "c"]))

## To generate a pair of test files:

In [None]:
stmt_numbers = [i for i in range(1, 30)]
# varnames = [abcxyz][1-5]
variable_names = []
for char in "a b c x y z".split():
    for i in range(5):
        variable_names.append(char+str(i))
constants = [i for i in range(100)]

expression_depth = 0
statement_depth = 0
max_depth = 8
expression_max_depth = 3

procedures_in_source = variable_names
procedure_statement_count=10
get_procedures(statement_count=procedure_statement_count, call_procedure_names=procedures_in_source, fname="auto3_source.txt")
generate_queries(num_queries=100, tuple_size=5, num_clauses=500, fname="auto3_queries.txt", stmt_numbers=stmt_numbers, var_names=variable_names, constants=constants)

In [None]:
stmt_numbers = [i for i in range(1, 30)]
# varnames = [abcxyz][1-5]
variable_names = ["while", "if", "then", "else", "read", "print", "call", "procedure"]
constants = [i for i in range(11)]

expression_depth = 0
statement_depth = 0
max_depth = 8
expression_max_depth = 2

procedures_in_source = variable_names
procedure_statement_count=20
get_procedures(statement_count=procedure_statement_count, call_procedure_names=procedures_in_source, fname="auto5_source.txt")
generate_queries(num_queries=100, tuple_size=2, num_clauses=10, fname="auto5_queries.txt", stmt_numbers=stmt_numbers, var_names=variable_names, constants=constants)