## Finding Missing Requirements (with Attributes)

In this notebook, I try to put together an iterative way to:
1. Add *unbound variables* until the requirement is satisfied
2. Get the *relationships* of the unbound variables
3. Filter these relationships by adding one at a time as a negated constraint
   and check again the model.
4. When we find the one that makes the model unsat, it means we've found the right one.

#### Goals
- Make the search of finding the correct relationship as fast as possible:
  usually relationships are quite a lot, and we need to iterate them at least a
  couple of times. We can add progressively only the relationships that remain
  after each pass, always one at a time and checking the model again.

In [1]:
from itertools import product
from operator import itemgetter

import yaml
from z3 import (And, Const, Consts, DatatypeSortRef, Exists, ForAll,
                FuncDeclRef, Implies, Model, Not, Or, Solver, sat, unsat)

from mc_openapi.doml_mc import DOMLVersion
from mc_openapi.doml_mc.intermediate_model.metamodel import (
    parse_inverse_associations, parse_metamodel)
from mc_openapi.doml_mc.xmi_parser.doml_model import (parse_doml_model,
                                                      parse_xmi_model)
from mc_openapi.doml_mc.z3encoding.im_encoding import (
    assert_im_associations, assert_im_attributes,
    def_elem_class_f_and_assert_classes, mk_attr_data_sort, mk_elem_sort_dict,
    mk_stringsym_sort_dict)
from mc_openapi.doml_mc.z3encoding.metamodel_encoding import (
    def_association_rel, def_attribute_rel, mk_association_sort_dict,
    mk_attribute_sort_dict, mk_class_sort_dict)
from mc_openapi.doml_mc.z3encoding.types import Refs

In [2]:
with open("../assets/doml_meta_v2.0.yaml") as mmf:
    mmdoc = yaml.load(mmf, yaml.Loader)
mm = parse_metamodel(mmdoc)

**You can change here the input DOML file**

In [3]:
# doml_document_path = "../../tests/doml/nginx-openstack_v2.0.domlx"
doml_document_path = "../../tests/doml/v2.0/nginx-openstack_v2.0_wrong_vm_iface.domlx"
# doml_document_path = "../../tests/doml/nginx-openstack_v2.0_wrong_iface_uniq.domlx"
# doml_document_path = "../../tests/doml/saas_no_https_rule.domlx"
# doml_document_path = "../../tests/doml/saas_https_no_attrs.domlx"
# doml_document_path = "../../tests/doml/openstack_template.domlx"

In [4]:
with open(doml_document_path, "rb") as xmif:
    doc = xmif.read()

im, _ = parse_doml_model(doc, DOMLVersion.V2_0)

We need to initialize each time the Solver context before iterating,
since an unbound variable is an element, and elements are an EnumSort,
and EnumSorts cannot be modified and depend on the solver context.

The following code is stuff that is already present in the `IntermediateModelChecker`.

In [5]:
from typing import Dict

Context = Dict

In [6]:
from mc_openapi.doml_mc.intermediate_model.metamodel import get_mangled_attribute_defaults


def initialize_solver(
    unbound_elems_quantity: int = 0,
    unbound_values_quantity: int = 0,
    requirements: list = []
) -> Context:
    ctx = dict()
    
    ctx["solver"] = Solver()

    ctx["class_sort"], ctx["class_"] = mk_class_sort_dict(mm, ctx["solver"].ctx)
    ctx["assoc_sort"], ctx["assoc"] = mk_association_sort_dict(mm, ctx["solver"].ctx)
    ctx["attr_sort"], ctx["attr"] = mk_attribute_sort_dict(mm, ctx["solver"].ctx)
    ctx["str_sort"], ctx["str"] = mk_stringsym_sort_dict(im, mm, ctx["solver"].ctx)
    ctx["attr_data_sort"] = mk_attr_data_sort(ctx["str_sort"], ctx["solver"].ctx)

    ctx["unbound_elems"] = [f"unbound_elem_{i}" for i in range(unbound_elems_quantity)]

    # Takes a list of strings and creates an Enum out of 'em
    ctx["elem_sort"], ctx["elem"] = mk_elem_sort_dict(im, ctx["solver"].ctx, ctx["unbound_elems"])

    ub_val_names = [f"unbound_val_{i}" for i in range(unbound_values_quantity)]
    ctx["unbound_values"] = {
        name : ctx["attr_data_sort"].placeholder for name in ub_val_names
    }
    # Examples of values that can go in unbound_values:
    # ctx["attr_data_sort"].int(42), # ok
    # ctx["attr_data_sort"].bool(True), # ok
    # ctx["attr_data_sort"].str("x"), # cant do: it accept a ctx["str"][<str_key>] as input
    # Const("x", ctx["attr_data_sort"]) # cant do: it is a symbolic value that cannot be converted to a BoolRef expression

    ctx["elem_class_f"] = def_elem_class_f_and_assert_classes(
        im,
        ctx["solver"],
        ctx["elem_sort"],
        ctx["elem"],
        ctx["class_sort"],
        ctx["class_"]
    )
    
    # attr_rel :: (elem_sort, attr_sort, attr_data_sort) -> BoolRef
    ctx["attr_rel"] = def_attribute_rel(
        ctx["attr_sort"],
        ctx["elem_sort"],
        ctx["attr_data_sort"]
    )

    assert_im_attributes(
        ctx["attr_rel"],
        ctx["solver"],
        im,
        mm,
        ctx["elem"],
        ctx["attr_sort"],
        ctx["attr"],
        ctx["attr_data_sort"],
        ctx["str"]
    )

    # assoc_rel :: (elem_sort, assoc_sort, elem_sort) -> BoolRef
    ctx["assoc_rel"] = def_association_rel(
        ctx["assoc_sort"],
        ctx["elem_sort"]
    )
    
    assert_im_associations(
        ctx["assoc_rel"],
        ctx["solver"],
        {k: v for k, v in im.items() if k not in ctx["unbound_elems"]},
        ctx["elem"],
        ctx["assoc_sort"],
        ctx["assoc"],
    )

    # Add requirements
    for req in requirements:
        req(ctx)

    return ctx

In [7]:
def req_every_vm_has_iface(ctx: Context):    
    vm, iface = Consts("vm iface", ctx["elem_sort"])
    vmIfaceAssertion = ForAll(
        [vm],
        Implies(
            ctx["elem_class_f"](vm) == ctx["class_"]["infrastructure_VirtualMachine"],
            Exists(
                [iface],
                And(
                    ctx["assoc_rel"](vm, ctx["assoc"]["infrastructure_ComputingNode::ifaces"], iface)
                )
            )
        )
    )
    ctx["solver"].assert_and_track(vmIfaceAssertion, "vm_iface")

In [8]:
def req_every_vm_has_cpucount(ctx: Context):    
    vm = Const("vm", ctx["elem_sort"])
    # CpuCount = Const("cpucount", ctx["attr_data_sort"]) # sort: AttributeData
    from z3 import IntSort
    CpuCount = Const("cpucount", IntSort())
    print("SORT: ", CpuCount.sort())
    vmIfaceAssertion = ForAll(
        [vm],
        Implies(
            ctx["elem_class_f"](vm) == ctx["class_"]["infrastructure_VirtualMachine"],
            Exists(
                [CpuCount],
                And(
                    ctx["attr_rel"](vm, ctx["attr"]["infrastructure_ComputingNode::cpu_count"], CpuCount)
                )
            )
        )
    )
    ctx["solver"].assert_and_track(vmIfaceAssertion, "vm_cpucount")

In [9]:
def req_every_iface_has_a_secgroup(ctx):
    sg, iface = Consts("sg iface", ctx["elem_sort"])
    vmIfaceSecGroupAssertion = ForAll(
        [sg],
        Implies(
            ctx["elem_class_f"](sg) == ctx["class_"]["infrastructure_SecurityGroup"],
            Exists([iface], 
                ctx["assoc_rel"](iface, ctx["assoc"]["infrastructure_NetworkInterface::associated"], sg)
            )
        )
    )
    ctx["solver"].assert_and_track(vmIfaceSecGroupAssertion, "vm_secgroup")

In [10]:
def req_iface_uniq(ctx):
    endPointAttr = ctx["attr"]["infrastructure_NetworkInterface::endPoint"]
    ni1, ni2 = Consts("ni1 ni2", ctx["elem_sort"])
    value = Const("value", ctx["attr_data_sort"])
    uniqueIfaceAssertion = And(
        ctx["attr_rel"](ni1, endPointAttr, value),
        ctx["attr_rel"](ni2, endPointAttr, value),
        ni1 != ni2,
    )
    ctx["solver"].assert_and_track(uniqueIfaceAssertion, "unique_iface")

In [11]:
def req_external_services_must_have_https(ctx):
    saas, sw_iface, sw_comp, deployment, ielem, net_iface, sec_group, rule = Consts("saas, sw_iface, sw_comp, deployment, ielem, net_iface, sec_group, rule", ctx["elem_sort"])
    assertion = ForAll(
        [saas, sec_group, sw_iface, sw_comp, deployment, ielem, net_iface],
        Implies(
            And(
                ctx["elem_class_f"](saas) == ctx["class_"]["application_SaaS"],
                ctx["elem_class_f"](sec_group) == ctx["class_"]["infrastructure_SecurityGroup"],
                ctx["assoc_rel"](saas, ctx["assoc"]["application_SaaS::exposedInterfaces"], sw_iface),
                ctx["assoc_rel"](sw_comp, ctx["assoc"]["application_SoftwareComponent::consumedInterfaces"], sw_iface),
                ctx["assoc_rel"](deployment, ctx["assoc"]["commons_Deployment::component"], sw_comp),
                ctx["assoc_rel"](deployment, ctx["assoc"]["commons_Deployment::node"], ielem),
                ctx["assoc_rel"](ielem, ctx["assoc"]["infrastructure_ComputingNode::ifaces"], net_iface),
                ctx["assoc_rel"](net_iface, ctx["assoc"]["infrastructure_NetworkInterface::associated"], sec_group),
                ctx["assoc_rel"](sec_group, ctx["assoc"]["infrastructure_SecurityGroup::rules"], rule),
            ),
            Exists([rule],
                And(
                    # Every node that contacts an external service should belong to a security group implementing HTTPS
                    ctx["attr_rel"](rule, ctx["attr"]["infrastructure_Rule::fromPort"], ctx["attr_data_sort"].int(443)),
                    ctx["attr_rel"](rule, ctx["attr"]["infrastructure_Rule::toPort"], ctx["attr_data_sort"].int(443)),
                    ctx["attr_rel"](rule, ctx["attr"]["infrastructure_Rule::kind"], ctx["attr_data_sort"].str(ctx["str"]["INGRESS"]))
                )
            )
        )
    )
    ctx["solver"].assert_and_track(assertion, "ext_service_https")

### REMEMBER TO ADD REQUIREMENTS TO THE LIST!

In [12]:
REQUIREMENTS = [
    # req_every_vm_has_iface,
    req_every_vm_has_cpucount
    # req_every_iface_has_a_secgroup,
    # req_iface_uniq,
    # req_external_services_must_have_https
]

In [13]:
def check_iteratively(ubelems_n: int = 0, ubvals_n: int = 0, requirements: list = [], curr_try: int = 0, max_tries: int = 10) -> Context:
    if curr_try > max_tries:
        raise RuntimeError("Max tries exceeded.")

    ctx = initialize_solver(ubelems_n, ubvals_n, requirements)
    solver = ctx["solver"]

    res = solver.check()

    if res == sat:
        print("\033[92m" + f"<Sat>\tubelems_n={ubelems_n}, ubvals_n={ubvals_n}" + "\033[0m")
        return ctx
    elif res == unsat:
        print("\033[91m" + f"<Unsat>\tubelems_n={ubelems_n}, ubvals_n={ubvals_n}" + "\033[0m")
        if ubelems_n > ubvals_n:
            new_ubvals_n = ubvals_n * 2 if ubvals_n >= 1 else 1
            return check_iteratively(ubelems_n, new_ubvals_n, requirements, curr_try + 1, max_tries)
            # TODO: Choose which goes first
        elif ubelems_n <= ubvals_n:
            new_ubelems_n = ubelems_n * 2 if ubelems_n >= 1 else 1
            return check_iteratively(new_ubelems_n, ubvals_n, requirements, curr_try + 1, max_tries)
    else: # res == dontknow
        raise RuntimeError("It took too long to decide.")

In [14]:
solved_ctx = check_iteratively(0, 1, requirements=REQUIREMENTS)

solved_model = solved_ctx["solver"].model()

SORT:  AttributeData
[92m<Sat>	ubelems_n=0, ubvals_n=1[0m


In [15]:
def get_ubelems_and_assoc(ctx: Context, model: Model):
    elem, assoc, assoc_rel, unbound_elems = itemgetter("elem", "assoc", "assoc_rel", "unbound_elems")(ctx)

    return [ ((e1n, e1), a, (e2n, e2)) 
        for (e1n, e1), a, (e2n, e2) in product(elem.items(), assoc.values(), elem.items()) 
        if (e1n in unbound_elems or e2n in unbound_elems) and model.eval(assoc_rel(e1, a, e2))
    ]

In [16]:
def get_ubvals_and_attr(ctx: Context, model: Model):
    elem, attr, attr_rel, unbound_values = itemgetter("elem", "attr", "attr_rel", "unbound_values")(ctx)

    return [ ((elem_k, elem_v), a, (ubval_k, ubval_v))
        for (elem_k, elem_v), a, (ubval_k, ubval_v) in product(elem.items(), attr.values(), unbound_values.items())
        if model.eval(attr_rel(elem_v, a, ubval_v))
    ]

In [17]:
# Utility code to print the names of associations and attributes in a human-readable way
def pretty_ubelems_assoc(assoc):
    (elem_1_k, elem_1_v), a, (elem_2_k, elem_2_v) = assoc
    elem_1 = im.get(elem_1_k)
    if elem_1:
        elem_1_name = f"{elem_1.class_} ({elem_1.user_friendly_name})" if elem_1_k[0:4] == "elem" else f"<'{elem_1_k}' not found>"
    else:
        elem_1_name = elem_1_k
    
    elem_2 = im.get(elem_2_k)
    if elem_2:
        elem_2_name = f"{elem_2.class_} ({elem_2.user_friendly_name})" if elem_2_k[0:4] == "elem" else f"<'{elem_2_k}' not found>"
    else:
        elem_2_name = elem_2_k
    
    assoc_name = str(a)

    return f"{elem_1_name:<50s} {assoc_name:<60s} {elem_2_name:<30s}"

def pretty_ubvals_attrs(attrs):
    (elem_k, elem_v), a, (ubval_k, ubval_v) = attrs

    elem_1 = im.get(elem_k)
    if elem_1:
        elem_1_name = f"{elem_1.class_} ({elem_1.user_friendly_name})" if elem_k[0:4] == "elem" else f"<'{elem_k}' not found>"
    else:
        elem_1_name = elem_k

    attr_name = str(a)

    val_name = str(ubval_k)

    return f"{elem_1_name:<50s} {attr_name:<60s} {val_name:<30s}"

In [18]:
ubelems_and_assoc = get_ubelems_and_assoc(solved_ctx, solved_model)
print("\n".join([pretty_ubelems_assoc(assoc) for assoc in ubelems_and_assoc]))

print("-" * 120)

ubvals_and_attr = get_ubvals_and_attr(solved_ctx, solved_model)
print("\n".join([pretty_ubvals_attrs(attr) for attr in ubvals_and_attr]))


------------------------------------------------------------------------------------------------------------------------
application_SoftwareComponent (nginx)              application_SoftwareComponent::licenseCost                   unbound_val_0                 
application_SoftwareComponent (nginx)              application_SoftwareComponent::configFile                    unbound_val_0                 
infrastructure_VMImage (v_img)                     infrastructure_ComputingNodeGenerator::uri                   unbound_val_0                 
infrastructure_VirtualMachine (vm1)                infrastructure_ComputingNode::architecture                   unbound_val_0                 
infrastructure_VirtualMachine (vm1)                infrastructure_ComputingNode::memory_mb                      unbound_val_0                 
infrastructure_VirtualMachine (vm1)                infrastructure_ComputingNode::memory_kb                      unbound_val_0                 
infrastructure_Virtu

In [19]:
def thin_ubelems_and_assoc(ctx: Context, ubelems_and_assoc: list):
    if not ubelems_and_assoc:
        return []

    (_, elem_1_v), a, (_, elem_2_v) = assoc = ubelems_and_assoc[0]
    assoc_rel = ctx["assoc_rel"](elem_1_v, a, elem_2_v)

    # Add negated constraint
    ctx["solver"].push()
    print(f"\tAdd constraint Not({pretty_ubelems_assoc(assoc)})")
    ctx["solver"].add(Not(assoc_rel))
    
    res = ctx["solver"].check()
    
    if res == sat:
        print("SAT:\tAdding one more constraint and trying again")
        # Get new ubelems_and_assoc
        model = ctx["solver"].model()
        thinned_ubelems_and_assoc = get_ubelems_and_assoc(ctx, model)
        
        # Print table showing the diff
        from difflib import context_diff
        uvar_as_text = lambda input: [pretty_ubelems_assoc(assoc) for assoc in input]
        print("\n".join([a for a in context_diff(uvar_as_text(ubelems_and_assoc), uvar_as_text(thinned_ubelems_and_assoc), lineterm="", fromfile='Before', tofile="After")]))

        # Iterate
        return thin_ubelems_and_assoc(ctx, thinned_ubelems_and_assoc)
    else:
        print("UNSAT\tLast constraint was the association we are looking for!")
        ctx["solver"].pop()
        
        if ubelems_and_assoc[1:]:
            print("\tIterating over")
            print("\t\t" + "\n\t\t".join([pretty_ubelems_assoc(assoc) for assoc in ubelems_and_assoc[1:]]))
        return [*set([assoc] + thin_ubelems_and_assoc(ctx, ubelems_and_assoc[1:]))]


assoc_to_implement = thin_ubelems_and_assoc(solved_ctx, ubelems_and_assoc)

In [20]:
print("\n".join([pretty_ubelems_assoc(assoc) for assoc in assoc_to_implement]))




In [21]:
def thin_ubvals_and_attr(ctx: Context, ubvals_and_attr: list):
    if not ubvals_and_attr:
        return []

    (_, elem_v), a, (_, attr_v) = attr = ubvals_and_attr[0]
    attr_rel = ctx["attr_rel"](elem_v, a, attr_v)

    # Add negated constraint
    ctx["solver"].push()
    print(f"\tAdd constraint Not({pretty_ubvals_attrs(attr)})")
    ctx["solver"].add(Not(attr_rel))
    
    res = ctx["solver"].check()
    
    if res == sat:
        print("SAT:\tAdding one more constraint and trying again")
        # Get new ubelems_and_assoc
        model = ctx["solver"].model()
        thinned_ubvals_and_attr = get_ubvals_and_attr(ctx, model)
        
        # Print table showing the diff
        from difflib import context_diff
        uvar_as_text = lambda input: [pretty_ubvals_attrs(attr) for attr in input]
        print("\n".join([a for a in context_diff(uvar_as_text(ubelems_and_assoc), uvar_as_text(thinned_ubvals_and_attr), lineterm="", fromfile='Before', tofile="After")]))

        # Iterate
        return thin_ubvals_and_attr(ctx, thinned_ubvals_and_attr)
    else:
        print("UNSAT\tLast constraint was the attribute we are looking for!")
        ctx["solver"].pop()
        
        if ubvals_and_attr[1:]:
            print("\tIterating over")
            print("\t\t" + "\n\t\t".join([pretty_ubvals_attrs(attr) for attr in ubvals_and_attr[1:]]))
        return [*set([attr] + thin_ubvals_and_attr(ctx, ubvals_and_attr[1:]))]


attrs_to_implement = thin_ubvals_and_attr(solved_ctx, ubvals_and_attr)

	Add constraint Not(application_SoftwareComponent (nginx)              application_SoftwareComponent::licenseCost                   unbound_val_0                 )
UNSAT	Last constraint was the attribute we are looking for!
	Iterating over
		application_SoftwareComponent (nginx)              application_SoftwareComponent::configFile                    unbound_val_0                 
		infrastructure_VMImage (v_img)                     infrastructure_ComputingNodeGenerator::uri                   unbound_val_0                 
		infrastructure_VirtualMachine (vm1)                infrastructure_ComputingNode::architecture                   unbound_val_0                 
		infrastructure_VirtualMachine (vm1)                infrastructure_ComputingNode::memory_mb                      unbound_val_0                 
		infrastructure_VirtualMachine (vm1)                infrastructure_ComputingNode::memory_kb                      unbound_val_0                 
		infrastructure_VirtualMachine (vm

In [22]:
print("\n".join([pretty_ubvals_attrs(attr) for attr in attrs_to_implement]))

infrastructure_VirtualMachine (vm1)                infrastructure_ComputingNode::memory_mb                      unbound_val_0                 
application_SoftwareComponent (nginx)              application_SoftwareComponent::configFile                    unbound_val_0                 
infrastructure_VirtualMachine (vm1)                infrastructure_ComputingNode::architecture                   unbound_val_0                 
infrastructure_VirtualMachine (vm1)                infrastructure_ComputingNode::cost                           unbound_val_0                 
infrastructure_VirtualMachine (vm1)                infrastructure_ComputingNode::memory_kb                      unbound_val_0                 
concrete_VirtualMachine (concrete_vm)              concrete_ConcreteElement::configurationScript                unbound_val_0                 
concrete_VMImage (concrete_vm_image)               concrete_ConcreteElement::configurationScript                unbound_val_0                 

From here, we should then add this in the Intermediate Model, and then in the ECore (?) to generate the DOML file somehow.

We can then provide the new file, or a diff to be patched onto the original?