## 1. Pattern Detection

### 1.1 Retrieve the names of all ast files

In [17]:
from code_analysis import AST, ASTReader
from pathlib import Path
import os

## Global variables
reader = ASTReader()

# Check if output file already exists, if so, delete and create new file
filename = "calls_to_db_output.txt"
if os.path.exists(filename):
    os.remove(filename)

# Open in 'append' mode to avoid overwriting the whole file after each modification
output_file = open(filename, "a")


## In specified directory, find all files ending with '.ast.json' in order to retrieve the list of all asts
## Store file names satisfying the consition in an array in order to iterate over it when calling the visitor
def get_ast_json_files(directory):
   directory = Path(directory)
   return [str(file) for file in directory.rglob('*.ast.json')]


### 1.2 Create visitor that will visit a given ast in search of calls to an SQL Database

In [18]:
from typing import List

class ASTMethodCallVisitor:
    def __init__(self, node_image_arr: List[str]):
        self.ast = None
        
        # Used to determine if the ast is being visited for the first time, in order to print the name of the file only once in the output file
        self.isFirstVisitToFile = True

        # Used to store all functions to search for while visitng ast. Makes it easier to reuse this class for different use cases.
        self.node_image_arr = node_image_arr

        # Used to print an empty line after an ast in which a function call has been visited
        self.hasFoundCall = False

    def visit(self, ast: AST):
        self.ast = ast
        self.isFirstVisitToFile = True

        # State which file is being visited, whether a call to one of the specified functions (in node_img_arr) has been found or not
        print(f"----- Visiting AST from file {self.ast.get_filename()} -----\n")

        # Start by visiting the root node of the ast
        # The private 'visit' function will recursively visit the children of each node
        self.__visit(self.ast.get_root())

        # After the entire ast has been visited
        if self.hasFoundCall:
            output_file.write("\n")

            # Set the flag back to false for the next file
            self.hasFoundCall = False
        
    def __visit(self, node_id: int):
        # Define the conditions that must be respected for a node to be considered a call to a function (based on the above markdown note)
        # The node's image refers to the name of the function/variable (ex: mysql_query, exec, etc)
        nodeImage = self.ast.get_image(node_id)
        isMethodOrFunctionCall = (self.ast.get_type(node_id) == "FunctionCall" or self.ast.get_type(node_id) == "MethodCall")

        if isMethodOrFunctionCall and nodeImage in self.node_image_arr:
            # Set the foundCall flag to true in order to add an empty line in the output file once the ast has been fully visited. Purely for formatting purposes.
            self.hasFoundCall = True

            #  State which file is being visited - only printed once in the output file if a call to a specified function has been detected.
            if self.isFirstVisitToFile:
                output_file.write(f"----- Visiting AST from file {self.ast.get_filename()} -----\n")
                self.isFirstVisitToFile = False

            # Print all the lines in the file where the call has been made
            output_file.write(f"Function '{nodeImage}' is called "
                f"at line {self.ast.get_position(node_id)[0]}\n")

        # Visit the node's children    
        for child_id in self.ast.get_children(node_id):
            self.__visit(child_id)

### 1.3 For each ast file, load ast in memory and visit nodes in search of function calls

In [19]:
def findMethodCallInFolder(directory: str, methods_to_find: List[str]):
    # Retrieve filenames of all ast in the specified directory
    astFilenames = get_ast_json_files(directory)

    # Iterate over the filenames array
    for filename in astFilenames:
        # Load ast in memory
        ast = reader.read_ast(filename)

        # Visit currently loaded ast
        visitor = ASTMethodCallVisitor(methods_to_find)
        visitor.visit(ast)


### 1.4 Specify directory to analyze and functions to find
_**Note:** In php, calls to an SQL db can be done in the following ways:_
```
    mysql_query ( * ) ;
    mysqli_query ( * ) ;
    $object - > execute () ;
    $object - > mysql - > exec ( * ) ;
```  

In [None]:
directory_to_analyze = "../code_to_analyze/wordpress_ast/"

# List of all possible db calls in php (based on the above note)
methods_to_find = ["mysql_query", "mysqli_query", "execute", "exec"]
findMethodCallInFolder(directory_to_analyze, methods_to_find)
output_file.close()

# 2. CVE Detection

## 2.1 CVEs to detect (gr.04): 
- [CVE-2017-7189](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2017-7189):
    - **Description**: main/streams/xp_socket.c in PHP 7.x before 2017-03-07 misparses **<u>fsockopen</u>** calls, such as by interpreting fsockopen('127.0.0.1:80', 443) as if the address/port were 127.0.0.1:80:443, which is later truncated to 127.0.0.1:80. This behavior has a security risk if the explicitly provided port number (i.e., 443 in this example) is hardcoded into an application as a security policy, but the hostname argument (i.e., 127.0.0.1:80 in this example) is obtained from untrusted input. 
- [CVE-2019-11039](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-11039):
    - **Description**: Function **<u>iconv_mime_decode_headers()</u>** in PHP versions 7.1.x below 7.1.30, 7.2.x below 7.2.19 and 7.3.x below 7.3.6 may perform out-of-buffer read due to integer overflow when parsing MIME headers. This may lead to information disclosure or crash. 
- [CVE-2021-21707](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-21707):
    - **Description**: In PHP versions 7.3.x below 7.3.33, 7.4.x below 7.4.26 and 8.0.x below 8.0.13, certain XML parsing functions, like **<u>simplexml_load_file()</u>**, URL-decode the filename passed to them. If that filename contains URL-encoded NUL character, this may cause the function to interpret this as the end of the filename, thus interpreting the filename differently from what the user intended, which may lead it to reading a different file than intended. 

## 2.2 Create new file to store output

In [21]:
# Check if output file already exists, if so, delete and create new file
filename = "cve_detection_output.txt"
if os.path.exists(filename):
    os.remove(filename)

# Open in 'append' mode to avoid overwriting the whole file after each modification
output_file = open(filename, "a")

## 2.3 Find all problematic functions in specified folder: 

Based on the descriptions provided in 2.1, each CVE pertains to a specific function. Given that the `findMethodCallInFolder` function implemented in the previous section is generic enough, it can be reused in this section as well, with the appropriate functions specified as parameter.

In [None]:
directory_to_analyze = "../code_to_analyze/test_cve/"

output_file.write(f"----- CVE-2017-7189 -----\n")
findMethodCallInFolder(directory_to_analyze, "fsockopen")

output_file.write(f"----- CVE-2019-11039 -----\n")
findMethodCallInFolder(directory_to_analyze, "iconv_mime_decode_headers")

output_file.write(f"----- CVE-2021-21707 -----\n")
findMethodCallInFolder(directory_to_analyze, "simplexml_load_file")


output_file.close()