# Authors
Ikram Kohil, 2019115 \
Johnatan Gao, 2013298

# Part 1 - CFG Construction

## 1.1 Utility functions

In [46]:
import os
from pathlib import Path
from code_analysis import ASTReader
from code_analysis import ASTException, CFG, AST

# Global variable - directory where cfg.json and .dot files generated by our code will be stored 
part1_output_directory = "output/part_1/"
part2_output_directory = "output/part_2/"
ast_reader = ASTReader()

# Utility functions taken from TP1
def get_json_files(extension, directory):
   directory = Path(directory)
   return [str(file) for file in directory.rglob(extension)]

def create_output_file(filename, directory):
    # Check if output directory exists, if not, create it
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Check if output file already exists, if so, delete and create new file
    if os.path.exists(filename):
        os.remove(filename)

    # Open in "append" mode to avoid overwriting the whole file after each modification
    return open(directory + filename, "a")

def close_output_file(file):
   file.close()

def get_filename_from_path(full_filepath):
    # Splits string into array of 2 elements, the last one corresponding to the segment we want
    segments = full_filepath.rsplit("/", 1)
    fullname = full_filepath

    # Error handling, in case file in question is not in a folder
    if len(segments) > 1:
        fullname = segments[1]

    return fullname.rsplit('.')[0]


# 1.2 Create visitor

## 1.2.1 Function call
<div style="text-align: justify;">

For this section, we simply followed the provided example to generate the relevant nodes in the `visit_FUNCTION_CALL` method. A private function `__update_context()` was created to easily update a cloned context after the creation of a new node. A modification had to be done in the `visit_GENERIC` function in order to automatically create an argument node after an argument is visited in the AST. Further details are provided in the comments of each function.

</div>

## 1.2.2 IfThenElse Statement
<div style="text-align: justify;">

For this section, no particular difficulty was encountered, but a modification had to be done to the provided `visit_BinOP` function in order to draw the appropriate relationship arrows upon visit to avoid having to manually do then in the `visit_IF_THEN_ELSE` method. It is important to note that we operated under the assumption that in the ast, the right child (excluding the condition child) of an `IfThen/Else` statement is always the statement executed when the condition is true, and the left when false.

Two additionnal private functions were also created: `___visit_CONDITION` and `__visit_STATEMENT_BODY` to minimize code duplication.

</div>

## 1.2.3 While
<div style="text-align: justify;">

Visited using the `visit_WHILE` method. Given that were also created: `___visit_CONDITION` and `__visit_STATEMENT_BODY` were created previously, this function was quite straight forward.

</div>

## 1.2.4 While/Continue Break
<div style="text-align: justify;">

Given the way our previous functions were structured as well as the fact that there is no specific tag for a while/continue/break, no particular function was made for this case. Instead, the handling of continue/break nodes is done in `___visit_GENERIC`. Since a break/continue can only be called inside a `StatementBody`, and that our `__visit_STATEMENT_BODY` simply calls the `visit_node` function, the logic for break/continue had to be done in `___visit_GENERIC`.

In the case of a break, the while is immediately ended, and thus we needed the id of the `WhileEnd` node. To do so, we went back up the AST until the id of the `While` node was retrieved. From there, we retrieved the id of the `Condition` node. From the AST id of the `Condition` node, we retrieved its CFG id, and then retrieved the id of the `WhileEnd` node (all of which could have been done from the cfg but judged that it was much easier and less likely to fail if done from the AST).

In the case of a continue, the current iteration is interrupted and the while restarted, and thus we needed the id of the `While` node. Therefore, we went back up the AST until the id of the `While` node was retrieved, and retrieved its CFG id.

The context had to be updated accordingly after each case, by setting `ctx["endId"] = 0` to ignore the code following these statements (to avoid drawing unnecessary edges).

</div>

In [47]:
# Part of this code was provided by the teaching assisstant.

class ASTtoCFGVisitor:
    def __init__(self):
        self.ast = None
        self.cfg = CFG()
        self.iNextNode = 0

    def get_new_node(self) -> int:
        self.iNextNode += 1
        return self.iNextNode

    def visit(self, ast: AST):
        self.ast = ast
        self.cfg = CFG()
        print(f"Visit AST from file {self.ast.get_filename()}")
        self.visit_ROOT()
        return self.cfg

    def visit_ROOT(self):
        ctx = {}
        entryNodeId = self.get_new_node()
        stopNodeId = self.get_new_node()
        rootAST = self.ast.get_root()
        self.cfg.set_root(entryNodeId)

        self.cfg.set_type(entryNodeId, "Entry")
        self.cfg.set_image(entryNodeId, "main")
        self.cfg.set_type(stopNodeId, "Exit")

        ctx['parent'] = entryNodeId
        ctx['scope'] = entryNodeId
        ctx['stopId'] = stopNodeId

        if self.ast.get_type(rootAST) == "Start":
            self.cfg.set_node_ptr(rootAST, entryNodeId)

        self.visit_node(rootAST, ctx)
        self.cfg.add_edge(ctx['endId'], stopNodeId)

    # chain nodes
    def visit_GENERIC(self, ast_node_id: int, ctx: dict) -> int:       
        cfg_node = self.get_new_node()
        self.cfg.set_node_ptr(ast_node_id, cfg_node)
        self.cfg.set_type(cfg_node, self.ast.get_type(ast_node_id))
        self.cfg.set_image(cfg_node, self.ast.get_image(ast_node_id))
        self.cfg.add_edge(ctx["parent"], cfg_node)
        

        # Break handling
        if self.cfg.get_type(cfg_node) == "Break":
            # If break statement, go back up the ast to find the while node again 
            # Once while node found, retrieve the whileEnd node from the cfg and point to it, since a break statement will exit the loop and skip all following code within the loop
            parent = self.ast.get_parents(ast_node_id)[0]
            while not self.ast.get_type(parent) == "While":
                parent = self.ast.get_parents(parent)[0]
            
            while_children = self.ast.get_children(parent)
            ast_condition_node = [child_id for child_id in while_children if self.ast.get_type(child_id) == "Condition"][0]
            cfg_condition_node = self.cfg.get_node_cfg_ptr(ast_condition_node)
            condition_children = self.cfg.get_children(cfg_condition_node)
            while_end_node = [child_id for child_id in condition_children if self.cfg.get_type(child_id) == "WhileEnd"][0]
            self.cfg.add_edge(cfg_node, while_end_node)

            # Reset end id since no code after the break will be executed
            ctx["endId"] = 0
            return cfg_node
        
        # Continue handling
        if self.cfg.get_type(cfg_node) == "Continue":
            # If continue statement, find the while node again and point to it, since the continue statement will skip all following code and restart the loop
            parent = self.ast.get_parents(ast_node_id)[0]
            while not self.ast.get_type(parent) == "While":
                parent = self.ast.get_parents(parent)[0]
            
            cfg_while_node = self.cfg.get_node_cfg_ptr(parent)
            self.cfg.add_edge(cfg_node, cfg_while_node)

            # Reset end id since no code after the continue will be executed
            ctx["endId"] = 0
            return cfg_node

        ctx["endId"] = cfg_node

        # In the case of an argument being called, add the argument node at the end of the call
        # Verify that the parent of the current node is an ArgumentList
        # Also verify that the current node isnt an argument itself, to account for the different parser used in the while_continue_break
        if self.ast.get_type(self.ast.get_parents(ast_node_id)[0]) == "ArgumentList" and not self.cfg.get_type(cfg_node) == "Argument":
            # Create argument node and update ctx accordingly
            cfg_argument_node = self.get_new_node()
            self.cfg.set_type(cfg_argument_node, "Argument")
            self.cfg.add_edge(ctx['endId'], cfg_argument_node)
            self.__update_context(ctx['endId'], cfg_argument_node, ctx)

        new_ctx = dict(ctx) # clone ctx
        new_ctx["parent"] = cfg_node
        for child_id in self.ast.get_children(ast_node_id):
            self.visit_node(child_id, new_ctx)
            new_ctx["parent"] = new_ctx["endId"]

        ctx["endId"] = new_ctx["endId"]
        return cfg_node

    def visit_GENERIC_BLOCK(self, ast_node_id: int, ctx: dict):
        new_ctx = dict(ctx) # clone ctx
        for child_id in self.ast.get_children(ast_node_id):      
            self.visit_node(child_id, new_ctx)
            new_ctx["parent"] = new_ctx["endId"]
        ctx["endId"] = new_ctx["endId"]

        return None

    def visit_BINOP(self, ast_node_id: int, ctx: dict) -> int:
        #Create BinOP node
        cfg_node = self.get_new_node()
        self.cfg.set_node_ptr(ast_node_id, cfg_node)
        self.cfg.set_type(cfg_node, self.ast.get_type(ast_node_id))
        self.cfg.set_image(cfg_node, self.ast.get_image(ast_node_id))

        #Visit right child
        new_ctx = dict(ctx) # clone ctx
        self.visit_node(self.ast.get_children(ast_node_id)[1], new_ctx)
        right = new_ctx['endId']

        #Visit right left
        new_ctx = dict(ctx) # clone ctx
        new_ctx["parent"] = right
        self.visit_node(self.ast.get_children(ast_node_id)[0], new_ctx)
        left = new_ctx['endId']

        #Link left child with BinOp
        self.cfg.add_edge(left, cfg_node)

        ## Added  section to draw the appropriate relationship arrows for each type of of binary operator
        ## If parent is also binOP, then its an operation (ie x = 1 + 1) and the call arg function needs to be called instead
        cfg_node_type = self.cfg.get_type(cfg_node)
        node_parent = self.ast.get_parents(ast_node_id)

        if cfg_node_type == "BinOP":
            if node_parent and not self.ast.get_type(node_parent[0]) == "BinOP":
                self.cfg.set_op_hands(cfg_node, left, right)
            elif node_parent and self.ast.get_type(node_parent[0]) == "BinOP":
                self.cfg.add_call_arg(cfg_node, left)
                self.cfg.add_call_arg(cfg_node, right)
        elif cfg_node_type == "RelOP":
            self.cfg.add_call_arg(cfg_node, left)
            self.cfg.add_call_arg(cfg_node, right)


        ctx["endId"] = cfg_node
        return cfg_node
    
    def visit_FUNCTION_CALL(self, ast_node_id: int, ctx: dict) -> int:
        # Get function node
        cfg_node = self.visit_GENERIC(ast_node_id, ctx)
        self.cfg.set_node_ptr(ast_node_id, cfg_node)
        
        # Get function id (its name) 
        children = self.cfg.get_children(cfg_node)
        cfg_function_id = [child_id for child_id in children if self.cfg.get_type(child_id) == "Id"][0]
        
        # Create node representing the call to the function and link it to last cfg node
        cfg_begin_node = self.get_new_node()
        self.cfg.set_type(cfg_begin_node, "CallBegin")
        self.cfg.set_image(cfg_begin_node, self.cfg.get_image(cfg_node))
        self.cfg.add_edge(ctx["endId"], cfg_begin_node)
        
        # We created a new node; Clone and update context
        new_ctx = dict(ctx) # clone ctx
        self.__update_context(ctx["endId"], cfg_begin_node, new_ctx)

        # Set calls to the function
        self.cfg.set_call_expr(cfg_begin_node, cfg_function_id)

        # Set call to arguments
        # Current ast node is functioncall, so get argumentlist child from ast
        ast_children = self.ast.get_children(ast_node_id)
        argument_list = [child_id for child_id in ast_children if self.ast.get_type(child_id) == "ArgumentList"]

        if len(argument_list) > 0:
            arguments = self.ast.get_children(argument_list[0])

            for argument in arguments:
                cfg_argument_node = self.cfg.get_node_cfg_ptr(argument)
                self.cfg.add_call_arg(cfg_begin_node, cfg_argument_node)
        
        # Create node representing the end of the call to the function
        cfg_end_node = self.get_new_node()
        self.cfg.set_type(cfg_end_node, "CallEnd")
        self.cfg.set_image(cfg_end_node, self.cfg.get_image(cfg_node))
        self.cfg.set_call(cfg_begin_node, cfg_end_node)
        self.__update_context(cfg_begin_node, cfg_end_node, new_ctx)

        # Create node representing the return value
        cfg_ret_value_node = self.get_new_node()
        self.cfg.set_type(cfg_ret_value_node, "RetValue")
        self.cfg.add_edge(cfg_end_node, cfg_ret_value_node)
        self.__update_context(cfg_end_node, cfg_ret_value_node, new_ctx)

        # Update the original context's last node id (endId)
        ctx["endId"] = new_ctx["endId"]
        return cfg_node

    def visit_IF_THEN_ELSE(self, ast_node_id: int, ctx: dict) -> int:
        # Get IfThen/Else node
        cfg_if_node = self.get_new_node()
        self.cfg.set_node_ptr(ast_node_id, cfg_if_node)

        # Generate If node first and link it to parent
        self.cfg.set_type(cfg_if_node, "If")
        self.cfg.set_image(cfg_if_node, self.ast.get_image(ast_node_id))
        self.cfg.add_edge(ctx["parent"], cfg_if_node)

        # Get children from ast to retrieve the condition node
        children = self.ast.get_children(ast_node_id)
        ast_condition_node = [child_id for child_id in children if self.ast.get_type(child_id) == "Condition"][0]
        
        # Create and visit condition node
        new_ctx = dict(ctx) # clone ctx
        cfg_condition_node = self.__visit_CONDITION(cfg_if_node, ast_condition_node, new_ctx)

        # Retrieve statement bodies (what gets executed if true/false)
        ast_statement_nodes = [child_id for child_id in children if self.ast.get_type(child_id) == "StatementBody"]

        # Create end of statement node in advance
        cfg_if_end_node = self.get_new_node()
        self.cfg.set_type(cfg_if_end_node, "IfEnd")

        true_stmt_ctx = dict(new_ctx) # clone modified ctx and set the condition node as parent
        # Visit right child (true statement body node)
        if len(ast_statement_nodes) > 0:
            cfg_true_statement_body_node = self.__visit_STATEMENT_BODY(cfg_condition_node, ast_statement_nodes[0], true_stmt_ctx)
            self.cfg.add_edge(cfg_true_statement_body_node, cfg_if_end_node)

        false_stmt_ctx = dict(new_ctx) # clone modified ctx and set the condition node as parent
        # Visit left child (else statement body node)
        if len(ast_statement_nodes) > 1:
            cfg_false_statement_body_node = self.__visit_STATEMENT_BODY(cfg_condition_node, ast_statement_nodes[1], false_stmt_ctx)
            self.cfg.add_edge(cfg_false_statement_body_node, cfg_if_end_node)
        else:
            # If no else statement, link end node to last visited node
            self.cfg.add_edge(new_ctx["endId"], cfg_if_end_node)    
        
        self.__update_context(new_ctx["endId"], cfg_if_end_node, new_ctx)
        ctx["endId"] = new_ctx["endId"]

        return cfg_if_node
    
    def visit_WHILE(self, ast_node_id: int, ctx: dict):
        # Get while node
        cfg_node = self.get_new_node()
        self.cfg.set_node_ptr(ast_node_id, cfg_node)

        # Generate while node and link it to parent
        self.cfg.set_type(cfg_node, self.ast.get_type(ast_node_id))
        self.cfg.set_image(cfg_node, self.ast.get_image(ast_node_id))
        self.cfg.add_edge(ctx["parent"], cfg_node)

        # Get children from ast to retrieve the condition node
        children = self.ast.get_children(ast_node_id)
        ast_condition_node = [child_id for child_id in children if self.ast.get_type(child_id) == "Condition"][0]
        
        # Create and visit condition node
        new_ctx = dict(ctx) # clone ctx
        cfg_condition_node = self.__visit_CONDITION(cfg_node, ast_condition_node, new_ctx)

        # Create node referring to end of while and update context accordingly
        cfg_while_end_node = self.get_new_node()
        self.cfg.set_type(cfg_while_end_node, "WhileEnd")
        self.__update_context(cfg_condition_node, cfg_while_end_node, new_ctx)
        self.cfg.add_edge(new_ctx["parent"], cfg_while_end_node)

        # Visit right child (true statement body node, which will be executed if the condition is true)
        ast_statement_nodes = [child_id for child_id in children if self.ast.get_type(child_id) == "StatementBody"]
        true_stmt_ctx = dict(new_ctx) # clone modified ctx and set the condition node as parent
        cfg_statement_body_node = self.__visit_STATEMENT_BODY(cfg_condition_node, ast_statement_nodes[0], true_stmt_ctx)
        self.cfg.add_edge(cfg_statement_body_node, cfg_node)

        ctx["endId"] = new_ctx["endId"]
        return cfg_node

    def __visit_CONDITION(self, cfg_parent_node, ast_condition_node, condition_ctx):
        cfg_condition_node = self.get_new_node()
        self.cfg.set_node_ptr(ast_condition_node, cfg_condition_node)
        self.cfg.set_type(cfg_condition_node, self.ast.get_type(ast_condition_node))

        # Visit child
        # Update context to the condition's parent
        condition_ctx['parent'] = cfg_parent_node
        condition_first_child = self.ast.get_children(ast_condition_node)[0]
        
        self.visit_node(condition_first_child, condition_ctx)
        condition_content = condition_ctx['endId']

        # Link bottom child of the condition content with condition node and update context to point to condition as last node for now
        self.cfg.add_edge(condition_content, cfg_condition_node)
        self.__update_context(condition_content, cfg_condition_node, condition_ctx)
    
        # Return last visited cfg node
        return cfg_condition_node
    
    def __visit_STATEMENT_BODY(self, cfg_parent_node, ast_stmt_node, stmt_ctx):
        stmt_ctx['parent'] = cfg_parent_node
        new_ctx = dict(stmt_ctx)

        # Ignore StatementBody node and visit its child directly
        statement_body_child = self.ast.get_children(ast_stmt_node)[0]
        self.visit_node(statement_body_child, new_ctx)

        # Return last visited cfg node
        return new_ctx['endId']

    def __update_context(self, parent_node_id, current_node_id, ctx_to_update):
        ctx_to_update["parent"] = parent_node_id
        ctx_to_update["endId"] = current_node_id


    def visit_node(self, ast_node_id: int, ctx: dict):
        cur_type = self.ast.get_type(ast_node_id)
        if cur_type is None:
            raise ASTException("Missing type in a node")

        if cur_type in ["BinOP", "RelOP", "LogicOP"]:
            self.visit_BINOP(ast_node_id, ctx)
        elif cur_type in ["Block", "Start"]:
            self.visit_GENERIC_BLOCK(ast_node_id, ctx)
        elif cur_type in ["FunctionCall"]:
            self.visit_FUNCTION_CALL(ast_node_id, ctx)
        elif cur_type in ["IfThenElseStatement", "IfThenStatement"]:
            self.visit_IF_THEN_ELSE(ast_node_id, ctx)
        elif cur_type in ["While"]:
            self.visit_WHILE(ast_node_id, ctx)
        elif cur_type in ["PLACEHOLDER"]: # Node to ignore
            self.visit_passthrough(ast_node_id, ctx)
        else:
            self.visit_GENERIC(ast_node_id, ctx)

    def visit_passthrough(self, ast_node_id: int, ctx: dict):
        for child_id in self.ast.get_children(ast_node_id):
            self.visit_node(child_id, ctx)


## 1.3 For each AST file, load AST in memory and visit it to generate a CFG

In [48]:
def generate_CFG_for_AST_in_folder(directory):
    # Retrieve filenames of all ast in the specified directory
    astFilenames = get_json_files('*.ast.json', directory)

    # Iterate over the filenames array
    for filename in astFilenames:
        # Load ast in memory
        ast = ast_reader.read_ast(filename)

        # Prepare cfg.json file
        output_file = create_output_file(get_filename_from_path(filename) + ".php.cfg.json", part1_output_directory)

        # Visit currently loaded ast
        visitor = ASTtoCFGVisitor()
        cfg = visitor.visit(ast)
        output_file.write(cfg.to_json())
        close_output_file(output_file)

        # Generate and show graph
        cfg.show(filename=part1_output_directory + get_filename_from_path(filename) + ".php.cfg.dot")

## 1.4 Specify directory to analyze

In [49]:
directory_to_analyze = "../part_1/"
generate_CFG_for_AST_in_folder(directory_to_analyze)


Visit AST from file code.php
Visit AST from file code.php
Visit AST from file code.php
Visit AST from file code.php
Visit AST from file code.php
Visit AST from file code.php
Visit AST from file code.php
Visit AST from file code.php


# Part 2 - Dead Code Detection
<div style="text-align: justify;">

The results of this search will be stored in the `output/part_2/dead_code_results.txt` file.

</div>

## 2.1 Create visitor
<div style="text-align: justify;">

The `CFGFunctionVisitor` starts visiting a CFG from its main entry node and visits every child until the bottom of the tree. This ensures that only the "active" (executed) code is visited, and leaves out the dead nodes, since they are do not have an entry node in their ancestry. This is kept in the `dead_nodes` global dictionnary, where the key is the filename and the value is an array containing the node ids of the dead code.

As for the interprocedural dead code, while visiting the main code (starting from main entry node) of each file, we add all the names of the functions called to `function_calls`. This set is updated with each function called in each file. As for the function definitions, they are also visited using the `self.cfg.get_func_entry_nodes()` function. They are store in the `function_definitions` dictionnary, where the filename is the key, and the value is a set containig the names of the functions defined in the class. The filtering of the interprocedural dead code is then done in section 2.2.

Further details are provided in the comments of the code.

</div>

In [50]:
from code_analysis import CFGReader
import sys

dead_nodes = {}
function_definitions = {}
function_calls = set() # Doesnt need to be a dictionnary since we will keep track of all function calls across the whole directory without distinction

# Create a visitor that keeps track of all function statements and function calls
class CFGFunctionVisitor:
    def __init__(self, filename):
        self.cfg = None
        self.filename = filename

        # Stored in array
        self.visited_nodes = set() # Using a set to avoid adding nodes that have already been visited
        self.function_calls_names = set()
        self.defined_func_names = set()
        self.intraprocedural_dead_nodes = []

    def visit(self, cfg: CFG):
        self.cfg = cfg
        print(f"Visiting CFG from file {self.filename}")

        # First, start from entry node to visit what the program itself executes
        # By starting the visit from the root (entry node), and visiting every child
        # We make sure to not visit dead nodes, since those do not have an entry node in their ancestry (same logic when visiting functions)
        self.__visit(self.cfg.get_root())

        # Then visit the functions that have been defined, if any
        # Start from entry node of function
        defined_func_entry_nodes = self.cfg.get_func_entry_nodes()

        # These two steps ensure we visit active nodes of all procedures
        if len(defined_func_entry_nodes) > 0:
            for entry_node in defined_func_entry_nodes:
                self.defined_func_names.add(self.cfg.get_entry_func_name(entry_node))

                # Set called_fn_flag to false to skip the functioncall search while visiting a function definition
                self.__visit(entry_node, called_fn_flag=False)

        # Once all nodes visited, filter the visited nodes from all cfg nodes
        # This will filter out intraprocedural dead code
        cfg_nodes = self.cfg.get_node_ids()
        self.intraprocedural_dead_nodes = [node_id for node_id in cfg_nodes if node_id not in self.visited_nodes]

        # Store everything in dictionnaries/array to process later
        function_calls.update(self.function_calls_names)
        dead_nodes.update({self.filename: self.intraprocedural_dead_nodes})
        function_definitions.update({self.filename:  self.defined_func_names})

    def __visit(self, node_id: int, called_fn_flag = True):
        self.visited_nodes.add(node_id)

        # The called_fn_flag helps with filtering the dead functions
        # Set to true while visiting main code, false when visiting function definition
        if called_fn_flag:
            # Retrieve the functions that were called
            if self.cfg.get_type(node_id) == "FunctionCall":
                self.function_calls_names.add(self.cfg.get_image(node_id))

        children = self.cfg.get_any_children(node_id)
        for child_id in children:
            # Only visit if not previously visited, for performance
            if child_id not in self.visited_nodes:
                self.__visit(child_id)

## 2.2 For each CFG file, load CFG in memory and visit it.
<div style="text-align: justify;">

In `find_dead_code_from_cfg_in_directory`, each cfg file is visited once to detect all intraprocedural dead code in each file, and to keep track of ALL function definitions and function calls. Then, once all code is visited, we iterate over the array of files (not over the files themselves) to remove all the called functions from the defined functions set. This will ensure that only the functions that were never called remain, which gives us the interprocedural dead code.

</div>

In [51]:
# Prepare output file and reader
cfg_reader = CFGReader()
dead_code_output_file = create_output_file("dead_code_results.txt", part2_output_directory)

def find_dead_code_from_cfg_in_directory(directory):
    # Retrieve filenames of all cfg in the specified directory
    cfgFilenames = get_json_files('*.cfg.json', directory)

    dead_code_output_file.write(f"________________________ Directory: {directory} ________________________\n")

    # Iterate over the filenames array once to visit all cfgs
    for filename in cfgFilenames:
        # Load cfg in memory
        cfg = cfg_reader.read_cfg(filename)

        # Visit currently loaded cfg
        visitor = CFGFunctionVisitor(filename)
        visitor.visit(cfg)

        # Generate fand show graph; commented because some graphs are far too large and greatly reduced performance
        # cg.show(filename=part2_output_directory + get_filename_from_path(filename) + ".cfg.dot")
    
    # Once all files have been visited, for each file, filter out the called functions from the defined functions
    for filename in cfgFilenames:
        if filename in function_definitions.keys() and filename in dead_nodes.keys():
            # Remove all called functions from the set of defined functions; only the uncalled function names will remain
            function_definitions[filename] -= function_calls

            # Output results
            dead_code_output_file.write(f"------------------------ File: {filename} ------------------------\n")
            dead_code_output_file.write(f"Intraprocedural dead code (node ids): {dead_nodes[filename] or 'None'}\n")
            dead_code_output_file.write(f"Interprocedural dead code (fn names): {function_definitions[filename] or 'None'}\n\n")
        



## 2.3 Specify directory to analyze
<div style="text-align: justify;">
Important note: we had to increase the default recursion limit in python, since some files in the wordpress folder were too large and some nodes had more children than the default recursion limit allowed to visit.
</div>

In [52]:
sys.setrecursionlimit(10000)
directories_to_analyze = ["../part_2/code_mort/", "../part_2/wordpress/wordpress_ast_cfg/"]

for directory in directories_to_analyze:
    find_dead_code_from_cfg_in_directory(directory)
close_output_file(dead_code_output_file)

Visiting CFG from file ../part_2/code_mort/example2.php.cfg.json
Visiting CFG from file ../part_2/code_mort/example3.php.cfg.json
Visiting CFG from file ../part_2/code_mort/example1.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-login.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-signup.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-cron.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-blog-header.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-settings.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-links-opml.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/readme.html.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/license.txt.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-trackback.php.cfg.json
Visiting CFG from file ../part_2/wordpress


(eog:17978): EOG-CRITICAL **: 00:02:44.646: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:17978): GLib-GIO-CRITICAL **: 00:02:44.646: g_file_equal: assertion 'G_IS_FILE (file1)' failed

(eog:17978): EOG-CRITICAL **: 00:02:44.646: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:17978): GLib-GIO-CRITICAL **: 00:02:44.646: g_file_equal: assertion 'G_IS_FILE (file1)' failed

(eog:17978): EOG-CRITICAL **: 00:02:44.646: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:17978): GLib-GIO-CRITICAL **: 00:02:44.646: g_file_equal: assertion 'G_IS_FILE (file1)' failed

(eog:17978): EOG-CRITICAL **: 00:02:44.646: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:17978): GLib-GIO-CRITICAL **: 00:02:44.646: g_file_equal: assertion 'G_IS_FILE (file1)' failed

(eog:17978): EOG-CRITICAL **: 00:02:44.646: eog_image_get_file: assertion 'EOG_IS_IMAGE (img)' failed

(eog:17978): GLib-GIO-CRITICAL **: 00:02:44.646: g_file_equal: assertion 'G_IS_F

Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-config-sample.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-activate.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-mail.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/index.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-comments-post.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-includes/class-wp-metadata-lazyloader.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-includes/class-feed.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-includes/class-wp-post-type.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-includes/query.php.cfg.json
Visiting CFG from file ../part_2/wordpress/wordpress_ast_cfg/wp-includes/class-wp-user-meta-session-tokens.php.cfg.json
Visiting CFG from file ../part_2/wordpres