# Authors
Ikram Kohil, 2019115 \
Johnatan Gao

# Part 1 - CFG Construction

## 1.1 Utility functions

In [None]:
import os
from pathlib import Path
from code_analysis import ASTReader
from code_analysis import ASTException, CFG, AST

# Global variable - directory where cfg.json and .dot files generated by our code will be stored 
output_directory = "output/part_1/"
reader = ASTReader()

# Utility functions taken from TP1
def get_ast_json_files(directory):
   directory = Path(directory)
   return [str(file) for file in directory.rglob('*.ast.json')]

def create_output_file(filename):
    # Check if output directory exists, if not, create it
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Check if output file already exists, if so, delete and create new file
    if os.path.exists(filename):
        os.remove(filename)

    # Open in 'append' mode to avoid overwriting the whole file after each modification
    return open(output_directory+filename, "a")

def close_output_file(file):
   file.close()

def get_filename_from_path(full_filepath):
    # Splits string into array of 2 elements, the last one corresponding to the segment we want
    segments = full_filepath.rsplit('/', 1)
    fullname = full_filepath

    # Error handling, in case file in question is not in a folder
    if len(segments) > 1:
        fullname = segments[1]

    return fullname.rsplit('.')[0]


# 1.2 Create visitor

## 1.2.1 Function call
For this section, we simply followed the provided example to generate the relevant nodes in the `visit_FUNCTION_CALL` method.

## 1.2.2 IfThenElse Statement
For this section, no particular difficulty was encountered, but a modification had to be done to the provided `visit_BinOP` function in order to draw the appropriate relationship arrows upon visit to avoid having to manually do then in the `visit_IF_THEN_ELSE` method.

In [None]:
# Part of this code was provided by the teaching assisstant.
# Function written by ourselves will be clearly identified

class ASTtoCFGVisitor:
    def __init__(self):
        self.ast = None
        self.cfg = CFG()
        self.iNextNode = 0

    def get_new_node(self) -> int:
        self.iNextNode += 1
        return self.iNextNode

    def visit(self, ast: AST):
        self.ast = ast
        self.cfg = CFG()
        print(f"Visit AST from file {self.ast.get_filename()}")
        self.visit_ROOT()
        return self.cfg

    def visit_ROOT(self):
        ctx = {}
        entryNodeId = self.get_new_node()
        stopNodeId = self.get_new_node()
        rootAST = self.ast.get_root()
        self.cfg.set_root(entryNodeId)

        self.cfg.set_type(entryNodeId, "Entry")
        self.cfg.set_image(entryNodeId, "main")
        self.cfg.set_type(stopNodeId, "Exit")

        ctx['parent'] = entryNodeId
        ctx['scope'] = entryNodeId
        ctx['stopId'] = stopNodeId

        if self.ast.get_type(rootAST) == "Start":
            self.cfg.set_node_ptr(rootAST, entryNodeId)

        self.visit_node(rootAST, ctx)
        self.cfg.add_edge(ctx['endId'], stopNodeId)

    # chain nodes
    def visit_GENERIC(self, ast_node_id: int, ctx: dict) -> int:
        cfg_node = self.get_new_node()
        self.cfg.set_node_ptr(ast_node_id, cfg_node)
        self.cfg.set_type(cfg_node, self.ast.get_type(ast_node_id))
        self.cfg.set_image(cfg_node, self.ast.get_image(ast_node_id))
        self.cfg.add_edge(ctx["parent"], cfg_node)

        ctx["endId"] = cfg_node

        new_ctx = dict(ctx) # clone ctx
        new_ctx["parent"] = cfg_node
        for child_id in self.ast.get_children(ast_node_id):
            self.visit_node(child_id, new_ctx)
            new_ctx["parent"] = new_ctx["endId"]
        ctx["endId"] = new_ctx["endId"]

        return cfg_node

    def visit_GENERIC_BLOCK(self, ast_node_id: int, ctx: dict):
        new_ctx = dict(ctx) # clone ctx
        for child_id in self.ast.get_children(ast_node_id):
            self.visit_node(child_id, new_ctx)
            new_ctx["parent"] = new_ctx["endId"]
        ctx["endId"] = new_ctx["endId"]

        return None

    def visit_BINOP(self, ast_node_id: int, ctx: dict) -> int:
        #Create BinOP node
        cfg_node = self.get_new_node()
        self.cfg.set_node_ptr(ast_node_id, cfg_node)
        self.cfg.set_type(cfg_node, self.ast.get_type(ast_node_id))
        self.cfg.set_image(cfg_node, self.ast.get_image(ast_node_id))

        #Visit right child
        new_ctx = dict(ctx) # clone ctx
        self.visit_node(self.ast.get_children(ast_node_id)[1], new_ctx)
        right = new_ctx['endId']

        #Visit right left
        new_ctx = dict(ctx) # clone ctx
        new_ctx["parent"] = right
        self.visit_node(self.ast.get_children(ast_node_id)[0], new_ctx)
        left = new_ctx['endId']

        #Link left child with BinOp
        self.cfg.add_edge(left, cfg_node)

        ## Added  section to draw the appropriate relationship arrows for each type of of binary operator
        cfg_node_type = self.cfg.get_type(cfg_node)
        if cfg_node_type == "BinOP":
            self.cfg.set_op_hands(cfg_node, left, right)
        elif cfg_node_type == "RelOP":
            self.cfg.add_call_arg(cfg_node, left)
            self.cfg.add_call_arg(cfg_node, right)


        ctx["endId"] = cfg_node
        return cfg_node
    
    def visit_FUNCTION_CALL(self, ast_node_id: int, ctx: dict) -> int:
        new_ctx = dict(ctx) # clone ctx
        cfg_node = self.visit_GENERIC(ast_node_id, ctx)
        self.cfg.set_node_ptr(ast_node_id, cfg_node)
        # Get function id (first child)
        cfg_function_child = self.cfg.get_children(cfg_node)[0]

        # Create argument node
        cfg_argument_node = self.get_new_node()
        self.cfg.set_type(cfg_argument_node, "Argument")
        # Link it to last cfg node
        self.cfg.add_edge(ctx["endId"], cfg_argument_node)
        # Update context
        self.__update_context(cfg_node, cfg_argument_node, new_ctx)

        # Create node representing the call to the function
        cfg_begin_node = self.get_new_node()
        self.cfg.set_type(cfg_begin_node, "CallBegin")
        self.cfg.set_image(cfg_begin_node, self.cfg.get_image(cfg_node))
        self.cfg.add_edge(cfg_argument_node, cfg_begin_node)
        self.__update_context(cfg_argument_node, cfg_begin_node, new_ctx)

        # Set calls to the function and parameters
        self.cfg.set_call_expr(cfg_begin_node, cfg_function_child)
        self.cfg.add_call_arg(cfg_begin_node, ctx["endId"])
        
        # Create node representing the end of the call to the function
        cfg_end_node = self.get_new_node()
        self.cfg.set_type(cfg_end_node, "CallEnd")
        self.cfg.set_image(cfg_end_node, self.cfg.get_image(cfg_node))
        self.cfg.set_call(cfg_begin_node, cfg_end_node)
        self.__update_context(cfg_begin_node, cfg_end_node, new_ctx)

        # Create node representing the return value
        cfg_ret_value_node = self.get_new_node()
        self.cfg.set_type(cfg_ret_value_node, "RetValue")
        self.cfg.add_edge(cfg_end_node, cfg_ret_value_node)
        self.__update_context(cfg_end_node, cfg_ret_value_node, new_ctx)

        # Update the original context's last node id (endId)
        ctx["endId"] = new_ctx["endId"]
        return cfg_node

    def visit_IF_THEN_ELSE(self, ast_node_id: int, ctx: dict) -> int:
        # Get if/then/else node
        cfg_if_node = self.get_new_node()
        self.cfg.set_node_ptr(ast_node_id, cfg_if_node)

        # Generate If node first
        self.cfg.set_type(cfg_if_node, "If")
        self.cfg.set_image(cfg_if_node, self.ast.get_image(ast_node_id))
        self.cfg.add_edge(ctx["parent"], cfg_if_node)

        # Get children from ast to retrieve the codition node
        children = self.ast.get_children(ast_node_id)
        ast_condition_node = [child_id for child_id in children if self.ast.get_type(child_id) == "Condition"][0]
        
        # Create condition node
        cfg_condition_node = self.get_new_node()
        self.cfg.set_node_ptr(ast_condition_node, cfg_condition_node)
        self.cfg.set_type(cfg_condition_node, self.ast.get_type(ast_condition_node))
        self.cfg.set_image(cfg_condition_node, self.ast.get_image(ast_condition_node))

        # Visit child
        # Set If node as parent
        new_ctx = dict(ctx) # clone ctx
        new_ctx['parent'] = cfg_if_node
        condition_first_child = self.ast.get_children(cfg_condition_node)[0]
        self.visit_node(condition_first_child, new_ctx)
        cfg_relOp_node = new_ctx['endId']

        # Link bottom child (relOp) with condition node and update context to point to condition as last node for now
        self.cfg.add_edge(cfg_relOp_node, cfg_condition_node)
        self.__update_context(cfg_relOp_node, cfg_condition_node, new_ctx)

        # Retrieve statement bodies (what gets executed if true/false)
        ast_statement_nodes = [child_id for child_id in children if self.ast.get_type(child_id) == "StatementBody"]

        # Visit right child (true)
        true_stmt_ctx = dict(new_ctx) # clone modified ctx and set the condition node as parent
        true_stmt_ctx['parent'] = cfg_condition_node
        # Ignore StatementBody node and visit its child directly
        true_statement_body_child = self.ast.get_children(ast_statement_nodes[0])[0]
        self.visit_node(true_statement_body_child, true_stmt_ctx)
        # Create argument node and update ctx accordingly
        cfg_true_argument_node = self.get_new_node()
        self.cfg.set_type(cfg_true_argument_node, "Argument")
        self.cfg.add_edge(true_stmt_ctx['endId'], cfg_true_argument_node)
        self.__update_context(true_stmt_ctx['endId'], cfg_true_argument_node, true_stmt_ctx)

        # Visit left child (false)
        false_stmt_ctx = dict(new_ctx) # clone modified ctx and set the condition node as parent
        false_stmt_ctx['parent'] = cfg_condition_node
        # Ignore StatementBody node and visit its child directly
        false_statement_body_child = self.ast.get_children(ast_statement_nodes[1])[0]
        self.visit_node(false_statement_body_child, false_stmt_ctx)
        # Create argument node and update ctx accordingly
        cfg_false_argument_node = self.get_new_node()
        self.cfg.set_type(cfg_false_argument_node, "Argument")
        self.cfg.add_edge(false_stmt_ctx['endId'], cfg_false_argument_node)
        self.__update_context(false_stmt_ctx['endId'], cfg_false_argument_node, false_stmt_ctx)
        
        # Create end of statement node
        cfg_if_end_node = self.get_new_node()
        new_ctx["endId"] = cfg_if_end_node
        self.cfg.set_type(cfg_if_end_node, "IfEnd")
        self.cfg.add_edge(cfg_true_argument_node, cfg_if_end_node)
        self.cfg.add_edge(cfg_false_argument_node, cfg_if_end_node)

        ctx["endId"] = new_ctx["endId"]
        return cfg_if_node
    

    def __update_context(self, parent_node_id, current_node_id, ctx_to_update):
        ctx_to_update["parent"] = parent_node_id
        ctx_to_update["endId"] = current_node_id


    def visit_node(self, ast_node_id: int, ctx: dict):
        cur_type = self.ast.get_type(ast_node_id)
        if cur_type is None:
            raise ASTException("Missing type in a node")

        if cur_type in ["BinOP", "RelOP", "LogicOP"]:
            self.visit_BINOP(ast_node_id, ctx)
        elif cur_type in ["Block", "Start"]:
            self.visit_GENERIC_BLOCK(ast_node_id, ctx)
        elif cur_type in ["FunctionCall"]:
            self.visit_FUNCTION_CALL(ast_node_id, ctx)
        elif cur_type in ["IfThenElseStatement"]:
            self.visit_IF_THEN_ELSE(ast_node_id, ctx)
        elif cur_type in ["PLACEHOLDER"]: # Node to ignore
            self.visit_passthrough(ast_node_id, ctx)
        else:
            self.visit_GENERIC(ast_node_id, ctx)

    def visit_passthrough(self, ast_node_id: int, ctx: dict):
        for child_id in self.ast.get_children(ast_node_id):
            self.visit_node(child_id, ctx)


## 1.3 For each ast file, load ast in memory and visit it to generate a cfg

In [None]:
def generate_CFG_for_AST_in_Folder(directory):
    # Retrieve filenames of all ast in the specified directory
    astFilenames = get_ast_json_files(directory)

    # Iterate over the filenames array
    for filename in astFilenames:
        # Load ast in memory
        ast = reader.read_ast(filename)

        # Prepare cfg.json file
        output_file = create_output_file(get_filename_from_path(filename) + ".php.cfg.json")

        # Visit currently loaded ast
        visitor = visitor = ASTtoCFGVisitor()
        cfg = visitor.visit(ast)
        output_file.write(cfg.to_json())
        close_output_file(output_file)

        # Generate and show graph
        cfg.show(filename=output_directory + get_filename_from_path(filename) + ".php.cfg.dot")

## 1.4 Specify directory to analyze

In [None]:
visitor = ASTtoCFGVisitor()

# Only keep ../part_1 for final
directory_to_analyze = "../part_1/if/"
generate_CFG_for_AST_in_Folder(directory_to_analyze)
