In [1]:
import os
import clang
from clang.cindex import *
from copy import deepcopy
import re
import time

In [2]:
Config.set_library_file("/home/dipu/anaconda3/lib/python3.9/site-packages/clang/native/libclang.so")

In [3]:
def generate_ast(code):
    with open('evaluation.c', 'w') as f:
        f.write(code)
    
    index = clang.cindex.Index.create()
    root_cursor = index.parse('evaluation.c').cursor

    return root_cursor

In [8]:
def generate_expression_details(root_cursor):
    # regex for matching string containing numbers, capital and small letters, operators (except ternary and assignment), parentheses and comma only
    expression_pattern = re.compile(r"^[a-zA-Z0-9_().,+\-*/%<>=!&|~\^]+$")

    # regex for valid multiplication operator or & operator (not pointer)
    left_pattern = re.compile(r"^[a-zA-Z0-9_)]+$")
    right_pattern = re.compile(r"^[a-zA-Z0-9_(!~]+$")

    # operator list (except * &)
    operator_list = ["+", "-", "/", "%", "++", "--", "<", "<=", ">", ">=", "==", "!=", "&&", "||", "!", "|", "<<", ">>", "~", "^"]


    def is_required_expression(node):
        # avoiding equals to ('=') operator
        if list(node.get_children())[0].kind == CursorKind.DECL_REF_EXPR:
            return False

        tokens = list(node.get_tokens())
        operator_set = set()

        for i in range(len(tokens)):
            spell = str(tokens[i].spelling)
            if (not expression_pattern.match(spell)) or (spell == "="):
                return False

            if (spell in operator_list) or \
            ((spell == "*" or spell == "&") and 0 < i < len(tokens)-1 and \
            left_pattern.match(str(tokens[i-1].spelling)) and \
            right_pattern.match(str(tokens[i+1].spelling))):
                operator_set.add(spell)


        # expression with atleast two different operators is needed
        if len(operator_set) <= 1:
            return False
        elif len(operator_set) == 2:
            # excluding operations having only '+' and '-' as both have same precedence
            if ('+' in operator_set) and ('-' in operator_set):
                return False
            # excluding operations having only '*' and '/' as both have same precedence
            elif ('*' in operator_set) and ('/' in operator_set):
                return False

        return True


    def get_binary_expressions(node, exp_details_list):
        try:
            if (node.kind == CursorKind.BINARY_OPERATOR or node.kind == CursorKind.UNARY_OPERATOR)\
                and is_required_expression(node):
                tokens = list(node.get_tokens())
                expression = [token.spelling for token in tokens]
                loc = node.extent
                exp_details_list.append(
                    [" ".join(expression), str(loc.start.line), str(loc.start.column), str(loc.end.line), str(loc.end.column)]
                )
            else:
                for child in node.get_children():
                    get_binary_expressions(child, exp_details_list)

        except Exception as e:
            print("***Exception***:", e)


    exp_details_list = []
    get_binary_expressions(root_cursor, exp_details_list)
    return exp_details_list

In [17]:
code = """
#include <stdio.h>

int main() {
    int a = 5, b = 2, c, d=7;

    // Arithmetic operators
    c = a / ((b * a) % b);
    d = a * b + c * d;

    return 0;
}

""".strip()

In [18]:
ast_root_cursor = generate_ast(code)
generate_expression_details(ast_root_cursor)

[['a / ( ( b * a ) % b )', '7', '9', '7', '26'],
 ['a * b + c * d', '8', '9', '8', '22']]