# Authors
Ikram Kohil, 2019115 \
Johnatan Gao, 2013298

# 1. DataFlow extraction

In this part, we must extract the intra-procedural dataflow [1] of the exmple code that was provided:

- For each variable reference, we must determine the corresponding definition(s) [2]
- For each definition, we must determine the corresponding reference(s) [3]

*For this lab, we must only consider the simple definitions of the form (Variable = Expression ^ Literal)* [4]

We must verify our implementation by insuring that the set of pairs (definition - refefence) corresponds to the set of pairs (reference - definition). We must then extract the sets for the files in the part_1 folder.

[1] Intra-procedural dataflow refers to the analysis of how variables and values flow within a single function or procedure. In other words, it looks at how information moves from one point to another within a piece of code that is defined by a set of procedures or functions. This type of analysis can be useful for identifying common programming errors such as uninitialized variables, null pointer references, and other issues related to the flow of data through a program.
''''
For each variable references, we must determine the corresponding definition (s)

Since it's been instructed to only consider simple definitions:
Variable = Expression ^ Literal

''''

In [10]:
# This is Ikram's code (reference, definition)



from code_analysis import CFG, CFGReader


class VariableVisitor:
    def __init__(self, filename):
        self.cfg = None
        self.filename = filename
        self.test = []

        # Stored in array
        ## Using a set to avoid adding nodes that have already been visited
        self.visited_nodes = set()
        ## Contains all references to variables
        self.refs = []
        ## Contains all defined variables
        self.defs = []

    def visit(self, cfg: CFG):
        self.cfg = cfg
        print(f"Visiting CFG from file {self.filename}")

        # Visit main program
        self.__visit(self.cfg.get_root())

        # Visit all other procedures in file
        defined_func_entry_nodes = self.cfg.get_func_entry_nodes()
        if len(defined_func_entry_nodes) > 0:
            for entry_node in defined_func_entry_nodes:
                self.__visit(entry_node)


    def __visit(self, node_id: int):
        is_binOP_equal = lambda child_node_id: self.cfg.get_type(child_node_id) == "BinOP" and self.cfg.get_image(child_node_id) == "="
        self.visited_nodes.add(node_id)
        # Find all the variable nodes the  operator first
        node_image = self.cfg.get_type(node_id)
        if(node_image == "Variable"):
            # Check if child is (BinOP =);
            node_children = self.cfg.get_children(node_id)
            node_binop_eqal = [child_node_id for child_node_id in node_children if is_binOP_equal(child_node_id)]
 
            ## if yes, then definition
            ## if not, then reference
            if len(node_binop_eqal) > 0:
                self.defs.append(node_id)
            else:
                self.refs.append(node_id)

        if(is_binOP_equal(node_id)):
            is_var = lambda child_node_id: self.cfg.get_type(child_node_id) == "Variable"
            node_var = [child_node_id for child_node_id in self.cfg.get_op_hands(node_id) if is_var(child_node_id)]
            print(self.cfg.get_op_hands(node_id))
            print(self.cfg.get_var_id(node_var[0]))

            # self.cfg.get_

            print(self.cfg.get_type(node_var[0]))
            print(self.cfg.get_position(node_var[0]))

        # Visit children
        children = self.cfg.get_any_children(node_id)

        

        for child_id in children:
            # Only visit if not previously visited, for performance
            if child_id not in self.visited_nodes:
                self.__visit(child_id)

cfg_reader = CFGReader()
filename = '../tp4/part_1/wordcount.php.cfg.json'
cfg = cfg_reader.read_cfg(filename)

# Visit currently loaded cfg
visitor = VariableVisitor(filename)
visitor.visit(cfg)

Visiting CFG from file ../tp4/part_1/wordcount.php.cfg.json
[10703, 10702]
466
Variable
[7, 7, 2, 7, 20, 20]
[10706, 10705]
467
Variable
[8, 8, 2, 3, 25, 25]
[10709, 10708]
468
Variable
[9, 9, 2, 3, 30, 30]
[10712, 10711]
469
Variable
[10, 10, 2, 3, 35, 35]
[10715, 10714]
470
Variable
[11, 11, 2, 3, 40, 40]
[10727, 10726]
471
Variable
[13, 13, 2, 3, 45, 45]
[10737, 10736]
472
Variable
[14, 14, 2, 2, 59, 59]
[10748, 10745]
469
Variable
[16, 16, 4, 5, 76, 76]
[10759, 10756]
467
Variable
[18, 18, 6, 7, 93, 93]
[10782, 10779]
470
Variable
[22, 22, 10, 11, 128, 128]
[10814, 10813]
466
Variable
[31, 31, 8, 13, 191, 191]
[10819, 10816]
468
Variable
[32, 32, 8, 9, 196, 196]
[10829, 10828]
472
Variable
[34, 34, 4, 4, 205, 205]
[10785, 10784]
466
Variable
[24, 24, 10, 15, 137, 137]
[10788, 10787]
466
Variable
[26, 26, 8, 13, 143, 143]
[10791, 10790]
466
Variable
[28, 28, 6, 11, 149, 149]


In [40]:
# This is Gao's code (reference, definition)
from code_analysis import CFG, CFGReader

class DefinitionVisitor:
    def __init__(self, filename):
        self.cfg = None
        self.filename = filename
        self.test = []

        # Stored in array
        ## Using a set to avoid adding nodes that have already been visited
        self.visited_nodes = set()
        ## Contains all references to variables
        self.refs = dict()
        ## Contains all defined variables (GEN)
        self.gen = dict()
        ## Contains  all the redefined variables (KILL)
        self.kills = dict()

    def visit(self, cfg: CFG):
        self.cfg = cfg
        print(f"Visiting CFG from file {self.filename}")

        # Visit main program
        self.__visit(self.cfg.get_root())

        # Visit all other procedures in file
        # defined_func_entry_nodes = self.cfg.get_func_entry_nodes()
        # if len(defined_func_entry_nodes) > 0:
        #     for entry_node in defined_func_entry_nodes:
        #         self.__visit(entry_node)
        
        print("GEN: ", self.gen)
        print("KILLS: ", self.kills)
        print("REFS: ", self.refs)


    def __visit(self, node_id: int):
        self.visited_nodes.add(node_id)
        self.__get_gen_and_kills(node_id)
        self.__get_references(node_id)

        # Visit children
        children = self.cfg.get_any_children(node_id)
        for child_id in children:
            # Only visit if not previously visited, for performance
            if child_id not in self.visited_nodes:
                self.__visit(child_id)

    def __get_gen_and_kills(self, node_id: int):
        if(self.__is_binOp_equal(node_id)):
            variable_node_id, definition_node_id = self.cfg.get_op_hands(node_id)
            label = self.cfg.get_image(variable_node_id)
            element = {
                "label": label,
                "label_pos": self.cfg.get_position(variable_node_id)[0],
                "definition_val": self.cfg.get_image(definition_node_id),
                "definition_pos": self.cfg.get_position(definition_node_id)[0]
            }
            if label not in self.gen:
                self.gen[label] = element
            else:
                self.kills[label] = [element] if label not in self.kills else self.kills[label].append(element)

    def __get_references(self, node_id: int):
        if self.cfg.get_type(node_id) == "Variable" and not self.__is_binOp_equal(self.cfg.get_any_children(node_id)[0]):
            label = self.cfg.get_image(node_id)
            element = {
                "label": label,
                "label_pos": self.cfg.get_position(node_id)[0],
            }
            self.refs[label] = [element] if label not in self.refs else self.refs[label].append(element)

    def __is_binOp_equal(self, node_id: int):
        return self.cfg.get_type(node_id) == "BinOP" and self.cfg.get_image(node_id) == "="


cfg_reader = CFGReader()
filename = '../tp4/part_1/test.php.cfg.json'
cfg = cfg_reader.read_cfg(filename)

# Visit currently loaded cfg
visitor = DefinitionVisitor(filename)
visitor.visit(cfg)

Visiting CFG from file ../tp4/part_1/test.php.cfg.json
{'b': {'label': 'b', 'label_pos': 2, 'definition_val': '1', 'definition_pos': 2}, 'a': {'label': 'a', 'label_pos': 3, 'definition_val': '0', 'definition_pos': 3}}
{'a': [{'label': 'a', 'label_pos': 6, 'definition_val': '1', 'definition_pos': 6}]}
{'b': [{'label': 'b', 'label_pos': 5}], 'a': [{'label': 'a', 'label_pos': 8}]}
