# ONLY USE THIS UTILITY WITH XML THAT CONTAIN MULTIPLE TABLES

In [0]:
pip install xmltodict

In [0]:
%restart_python

In [0]:
import shutil
import xmltodict
import re
from collections import OrderedDict
import sys
import time
import os
from pathlib import Path


In [0]:
SELECT_STR = "SELECT "
GROUP_STR = "GROUP BY ALL"
AGG_BEHAV_KEY = "@aggregationType"
FILTER_EXP_KEY = "filter"
ELEM_FILTER_KEY = "elementFilter"
LAYOUT_KEY = "layout"
ELEM_KEY = "viewAttributes"
INPUT_KEY = "input"
MAPPING_KEY = "mapping"
NAME_KEY = "@id"
TEXT_KEY = "#text"
VIEWNODE_KEY = "@node"
ENTITY_KEY = "@node"
LEFTINP_KEY = "@leftInput"
RIGHTINP_KEY = "@rightInput"
JOINTYP_KEY = "@joinType"
JOINATR_KEY = "joinAttribute"
SRC_KEY = "@source"
TRGT_KEY = "@target"
CALCS_KEY = "calculatedViewAttributes"
CALC_KEY = "calculatedViewAttribute"
FORMULA_KEY = "formula"
YCOORD_KEY = "@yCoordinate"
XSITYP_KEY = "@xsi:type"
DATATYPE_KEY = "@datatype"

all_queries = dict()
join_node_tbl_alias = dict()


def has_inline_type(elem):
    return DATATYPE_KEY in elem


def is_num_type(elem):
    if not has_inline_type(elem):
        return False
    return elem[DATATYPE_KEY] in [
        "NUMBER",
        "INTEGER",
        "DECIMAL",
        "FLOAT",
        "DOUBLE",
        "REAL",
    ]


def cast_type(elem):
    val = elem[FORMULA_KEY]
    datatype = elem[DATATYPE_KEY]
    if (
        datatype == "DATE"
        and len(re.findall("date[\r\n\s]*\(.*?\)", val, flags=re.IGNORECASE)) == 0
    ):
        val = "try_to_date(" + val.replace("+", " || ") + ", 'yyyyMMdd')"
    elif datatype == "TIMESTAMP":
        val = "TO_TIMESTAMP_NTZ(" + val.replace("+", " || ") + ", 'yyyyMMddHHmmss')"
    return val

def lower_except_quotes(text):
    if not isinstance(text, str):
        return text
    def preserve_case(match):
        quoted = match.group(1)
        unquoted = match.group(2)
        return quoted if quoted else unquoted.lower()
    pattern = re.compile(r"('.*?')|([^']+)")
    return ''.join(
        preserve_case(m) if isinstance(m.group(2), str) else m.group(1) for m in pattern.finditer(text)
    )
def is_list(obj):
    return isinstance(obj, list)


def newline_beatify(s):
    return re.sub("((?:[^,]*,){1})", r"\1\n", s, 0, re.DOTALL)


def is_elem_aggregated(elem):
    if (
        AGG_BEHAV_KEY not in elem
        or elem[AGG_BEHAV_KEY] == "NONE"
        or elem[AGG_BEHAV_KEY] == FORMULA_KEY
    ):
        return False
    else:
        return True


def replace_dict(query, rep_dict):
    mod_str = str(query)
    for repl, value in rep_dict.items():
        mod_str = mod_str.replace(repl, value)
    return mod_str


def repl_sap_func(s):
    s = s.replace("if( ","IF(")
    # REPLACE SAP NOW() with CURRENT_TIMESTAMP
    matches = re.findall("now\(.*?\)", s, flags=re.IGNORECASE)
    match_dict = {item: "current_timestamp" for item in matches}
    mod_str = replace_dict(s, match_dict)
    # REPLACE isNull with IS NULL
    matches = re.findall("isnull[\r\n\s]*\(.*?\)", mod_str, flags=re.IGNORECASE)
    match_dict = {
        item: re.findall(r"\((.*?)\)", item)[0] + " is null" for item in matches
    }
    mod_str = replace_dict(mod_str, match_dict)
    # REPLACE IF 
    matches = re.findall("IF[\r\n\s]*\(.*?\)", mod_str, flags=re.IGNORECASE)
    match_dict = {
        item: "if(" + re.findall(r"\((.*?)\)", item)[0] + ")" for item in matches
    }
    mod_str = (
        replace_dict(mod_str, match_dict)
    ).replace('"', '')
    # REPLACE CASE with DECODE
    matches = re.findall("case[\r\n\s]*\(.*?\)", mod_str, flags=re.IGNORECASE)
    match_dict = {
        item: "decode(" + re.findall(r"\((.*?)\)", item)[0] + ")" for item in matches
    }
    mod_str = replace_dict(mod_str, match_dict).replace("case(", "DECODE(")
    # REPLACE MIDSTR with SUBSTRING
    matches = re.findall("midstr[\r\n\s]*\(.*?\)", mod_str, flags=re.IGNORECASE)
    match_dict = {
        item: "substring(" + re.findall(r"\((.*?)\)", item)[0] + ")" for item in matches
    }
    mod_str = replace_dict(mod_str, match_dict)
    # REPLACE LEFTSTR with LEFT
    matches = re.findall("leftstr[\r\n\s]*\(.*?\)", mod_str, flags=re.IGNORECASE)
    match_dict = {
        item: "left(" + re.findall(r"\((.*?)\)", item)[0] + ")" for item in matches
    }
    mod_str = replace_dict(mod_str, match_dict)
    # REPLACE RIGHTSTR with RIGHT
    matches = re.findall("rightstr[\r\n\s]*\(.*?\)", mod_str, flags=re.IGNORECASE)
    match_dict = {
        item: "right(" + re.findall(r"\((.*?)\)", item)[0] + ")" for item in matches
    }
    mod_str = replace_dict(mod_str, match_dict)
    # REPLACE MATCH with REGEXP_LIKE
    matches = re.findall("match[\r\n\s]*\(.*?\)", mod_str, flags=re.IGNORECASE)
    match_dict = {
        item: "rlike("
        + re.findall(r"\((.*?)\)", item)[0].replace("*", ".*")
        + ")"
        for item in matches
    }
    mod_str = replace_dict(mod_str, match_dict)
    # REPLACE SAP DATE() with DATE with format
    matches = re.findall("^date[\r\n\s]*\(.*?\)", mod_str, flags=re.IGNORECASE)
    match_dict = {
        item: "date(" + re.findall(r"\((.*?)\)", item)[0] + r",\'yyyyMMdd\')"
        for item in matches
    }
    mod_str = replace_dict(mod_str, match_dict)
    # REPLACE SAP DAYSBETWEEN() with DATE with format
    matches = re.findall("daysbetween[\r\n\s]*\(.*?\)", mod_str, flags=re.IGNORECASE)
    match_dict = {
        item: "datediff(day," + re.findall(r"\((.*?)\)", item)[0] + r",\'yyyyMMdd\')"
        for item in matches
    }
    mod_str = replace_dict(mod_str, match_dict)
    # REPLACE SAP FORMAT() with to_string
    matches = re.findall("format[\r\n\s]*\(.*?\)", mod_str, flags=re.IGNORECASE)
    match_dict = {
        item: "cast(" + re.findall(r"\((.*?)\)", item)[0] + " as string)"
        for item in matches
    }
    mod_str = replace_dict(mod_str, match_dict)
    # REPLACE SAP STRING() with ::STRING
    matches = re.findall("string\(.*?\)", mod_str, flags=re.IGNORECASE)
    match_dict = {
        item: re.findall(r"\((.*?)\)", item)[0] + "::string" for item in matches
    }
    mod_str = replace_dict(mod_str, match_dict)
    # REPLACE SAP IN() with IN
    matches = re.findall(  
        r'in\(\s*([^)]*?)\s*\)', mod_str,flags=re.IGNORECASE  
    )  
    match_dict = {  
        item: f"{item.split(',')[0] if item.split('(')[0] not in ('left','right') else item.split(',')[0] + ',' + item.split(',')[1]} IN ({', '.join([v.strip().replace(')', '') for v in (item.split(',')[1:] if item.split('(')[0] not in ('left','right') else item.split(',')[2:])])}"   if item.split('(')[0] not in ("ltrim") else item + ') in ('
        for item in matches 
    }
    mod_str = replace_dict(mod_str, match_dict)
    return mod_str


def get_filter_exp(node):
    if FILTER_EXP_KEY in node.keys():
        return "WHERE " + node[FILTER_EXP_KEY]
    elif ELEM_FILTER_KEY in node.keys():
        elem_filter = node[ELEM_FILTER_KEY]

        if is_list(elem_filter):
            where_str = "WHERE "
            for elem_f in elem_filter:
                if elem_f["valueFilter"][XSITYP_KEY] == "Column:SingleValueFilter":
                    where_str = (
                        where_str
                        + "\n"
                        + '"'
                        + elem_f["@elementName"]
                        + "\"='"
                        + elem_f["valueFilter"]["@value"]
                        + "'\nAND"
                    )
                elif elem_f["valueFilter"][XSITYP_KEY] == "Column:ListValueFilter":
                    operands = (
                        elem_f["valueFilter"]["operands"]
                        if is_list(elem_f["valueFilter"]["operands"])
                        else list(elem_f["valueFilter"]["operands"])
                    )
                    operands = ["'" + op["@value"] + "'" for op in operands]
                    in_str = " IN (" + ",".join(operands) + ")"
                    where_str = (
                        where_str + "\n" + '"' + elem_f["@elementName"] + '"' + in_str
                    )
            return where_str
        else:
            if elem_filter["valueFilter"][XSITYP_KEY] == "Column:SingleValueFilter":
                return (
                    "WHERE "
                    + '"'
                    + elem_filter["@elementName"]
                    + "\"='"
                    + elem_filter["valueFilter"]["@value"]
                    + "'"
                )
            elif elem_filter["valueFilter"][XSITYP_KEY] == "Column:ListValueFilter":
                operands = (
                    elem_filter["valueFilter"]["operands"]
                    if is_list(elem_filter["valueFilter"]["operands"])
                    else list(elem_filter["valueFilter"]["operands"])
                )
                operands = ["'" + op["@value"] + "'" for op in operands]
                in_str = " IN (" + ",".join(operands) + ")"
                return "WHERE " + '"' + elem_filter["@elementName"] + '"' + in_str
    else:
        return ""


def dataSources(s):
    for node in full_xml_nodes["dataSources"]["DataSource"]:
        if isinstance(node,str):
            datasource_nodes = [(full_xml_nodes["dataSources"]["DataSource"]["@id"])]
            return str(datasource_nodes)    
        if node["@id"] in node:
            datasource_nodes = [node]
            return str(datasource_nodes)
        else:
            datasource_nodes = [s]
            return str(datasource_nodes).replace("#", "")
            


def get_obj(s):
    return s.split("/")[-1]


def get_from_part(node):
    from_part = "from " + dataSources(node[VIEWNODE_KEY])
    return from_part.replace("['", "").replace("']", "")


def type_of_join(s):
    if "leftouter" in s.lower():
        return " left join "
    elif "rightouter" in s.lower():
        return " right join "
    elif "inner" in s.lower() or "referential" in s.lower():
        return " inner join "


def get_cols_from_map(mp, reverse=True, fromnode=None):
    res = []

    if fromnode == "JoinNode":
        if reverse:
            for k, v in mp.items():
                if v.split(".")[-1].strip().lower() == k.strip().lower():
                    res.append(v)
                else:
                    res.append(v + " " + k)
        else:
            for k, v in mp.items():
                if v.split(".")[-1].strip().lower() == k.strip().lower():
                    res.append(k)
                else:
                    res.append(k + " " + v)
    else:
        if reverse:
            for k, v in mp.items():
                if v.strip().lower() == k.strip().lower():
                    res.append(v)
                else:
                    res.append(v + " " + k)
        else:
            for k, v in mp.items():
                if v.strip().lower() == k.strip().lower():
                    res.append(k)
                else:
                    res.append(k + " " + v)
    return res


def get_calc_cols_from_map(mp, is_join=False):
    if not is_join:
        res = []
        for k, v in mp.items():
            if v.strip().lower() == k.strip().lower():
                res.append(v)
            else:
                res.append(v + " AS " + k)
    elif is_join:
        res = {}
        for k, v in mp.items():
            v = (
                v.strip()
            )  # Clean the value by removing newlines and stripping whitespace
            if v.lower() == k.strip().lower():
                res[k] = v  # If they match, use the cleaned value as is
            else:
                res[k] = v + " AS "  # Otherwise, append ' AS k' to the cleaned value
    return res


def generate_on_part(leftcols, rightcols, leftalias, rightalias):
    on_part = ""
    if is_list(leftcols):
        on_cols = [
            leftalias
            + "."
            + str(leftcols[x]["@source"])
            + " = "
            + rightalias
            + "."
            + str(rightcols[x]["@source"])
            for x in range(len(leftcols))
                if leftcols[x]["@target"] in [y["@target"] for y in rightcols]
        ]
        on_part = "ON " + " AND\n".join(on_cols)
    elif isinstance(leftcols, str):
        on_part = (
            "ON " + leftalias + "." + leftcols + " = " + rightalias + "." + rightcols
        )
    return on_part


def is_calc_col(col):
    if col is not None:
        if "calculatedViewAttribute" in col:
            return True
        else:
            return False
    else:
        return False


def get_calc_columns(elem):
    mapping = dict()
    if isinstance(elem[CALC_KEY], dict):
        if has_inline_type(elem[CALC_KEY]):
            mapping[elem[CALC_KEY][NAME_KEY]] = cast_type(elem[CALC_KEY])
        else:
            mapping[elem[CALC_KEY][NAME_KEY]] = elem[CALC_KEY][FORMULA_KEY]
    elif is_list(elem[CALC_KEY]):
        for x in range(len(elem[CALC_KEY])):
            if has_inline_type(elem[CALC_KEY][x]):
                mapping[elem[CALC_KEY][x][NAME_KEY]] = cast_type(elem[CALC_KEY][x])
            else:
                mapping[elem[CALC_KEY][x][NAME_KEY]] = elem[CALC_KEY][x][FORMULA_KEY]
    return mapping


def wrap_agg(s, fun):
    return fun + "(" + s + ") AS "


def generate_full_join(node, node_name):
    all_tbls_alias = dict()
    all_tbls_alias_repl = dict()
    all_tbls_alias_repl1 = dict()
    mapping_left = dict()
    mapping_right = dict()
    full_join = "FROM\n"
    if is_list(node[INPUT_KEY]):
        i = 0
        for j in range(len(node[INPUT_KEY]) - 1):
            i += 1
            all_tbls_alias.setdefault((node[INPUT_KEY][j][VIEWNODE_KEY]), "t" + str(i))
            i += 1
            all_tbls_alias.setdefault(
                (node[INPUT_KEY][i - 1][VIEWNODE_KEY]), "t" + str(i)
            )
            if isinstance(node[INPUT_KEY][j][MAPPING_KEY], list):
                for m in range(len(node[INPUT_KEY][j][MAPPING_KEY])):
                    mapping_left[node[INPUT_KEY][j][MAPPING_KEY][m]['@target']] =node[INPUT_KEY][j][MAPPING_KEY][m]['@source']
            else:
                mapping_left[node[INPUT_KEY][j][MAPPING_KEY]['@target']] = node[INPUT_KEY][j][MAPPING_KEY]['@source']
            if isinstance(node[INPUT_KEY][i-1][MAPPING_KEY], list):
                for n in range(len(node[INPUT_KEY][i-1][MAPPING_KEY])):
                    mapping_right[node[INPUT_KEY][i-1][MAPPING_KEY][n]['@target']] = [node[INPUT_KEY][i-1][MAPPING_KEY][n]['@source']]
            else:
                mapping_right[node[INPUT_KEY][i-1][MAPPING_KEY]['@target']] = [node[INPUT_KEY][i-1][MAPPING_KEY]['@source']]
            
            if j == 0:
                join_part = (
                    node[INPUT_KEY][j][VIEWNODE_KEY]
                    + " "
                    + all_tbls_alias[node[INPUT_KEY][j][VIEWNODE_KEY]] 
                    + "\n"
                    + type_of_join(node[JOINTYP_KEY])
                    + "\n"
                    + 
                    node[INPUT_KEY][i-1][VIEWNODE_KEY] 
                    + " "
                    + all_tbls_alias[node[INPUT_KEY][i - 1][VIEWNODE_KEY]]
                    + "\n"
                )
            else:
                join_part = (
                    "\n"
                    + type_of_join(node[j][JOINTYP_KEY])
                    + "\n"
                    + node[j][VIEWNODE_KEY]
                    + " "
                    + all_tbls_alias[node[j][VIEWNODE_KEY]]
                    + "\n"
                )
            
            mapped_left = [{'@target': x['@name'], '@source': mapping_left[x['@name']]} for x in node[JOINATR_KEY]] if isinstance(node[JOINATR_KEY], list) else [{'@target': node[JOINATR_KEY]['@name'], '@source': mapping_left[node[JOINATR_KEY]['@name']]}]
            mapped_right = [{'@target': x['@name'], '@source': mapping_right[x['@name']]} for x in node[JOINATR_KEY]] if isinstance(node[JOINATR_KEY], list) else [{'@target': node[JOINATR_KEY]['@name'], '@source': mapping_right[node[JOINATR_KEY]['@name']]}]
  
            on_part = generate_on_part(
                mapped_left,
                mapped_right,
                all_tbls_alias[node[INPUT_KEY][j][VIEWNODE_KEY]],
                all_tbls_alias[node[INPUT_KEY][i - 1][VIEWNODE_KEY]],
            )
            join_part = join_part + on_part
            full_join = full_join + join_part
        for k, v in all_tbls_alias.items():
            all_tbls_alias_repl[k] = get_obj(k)
            all_tbls_alias_repl1[get_obj(k)] = v
        full_join = replace_dict(full_join, all_tbls_alias_repl)
        join_node_tbl_alias.update({node_name: all_tbls_alias_repl1})
    else:
        leftalias = "t1"
        rightalias = "t2"
        all_tbls_alias.setdefault(node[VIEWNODE_KEY], leftalias)
        all_tbls_alias.setdefault(node[VIEWNODE_KEY], rightalias)
        join_part = (
            node[VIEWNODE_KEY]
            + " "
            + leftalias
            + "\n"
            + type_of_join(node[JOINTYP_KEY])
            + "\n"
            + node[VIEWNODE_KEY]
            + " "
            + rightalias
            + "\n"
        )
        on_part = generate_on_part(
            node["leftElementName"], node["rightElementName"], leftalias, rightalias
        )
        
        join_part = join_part + on_part
        full_join = full_join + join_part
        for k, v in all_tbls_alias.items():
            all_tbls_alias_repl[k] = get_obj(k)
            all_tbls_alias_repl1[get_obj(k)] = v
        full_join = replace_dict(full_join, all_tbls_alias_repl)
    join_node_tbl_alias.update({node_name: all_tbls_alias_repl1})
    return full_join


def get_other_colmap(nodes, othercols, node_name) -> dict:
    join_nodes = [x for x in nodes[INPUT_KEY] if MAPPING_KEY in x]
    node_aliases = join_node_tbl_alias[node_name]
    all_join_nodes = dict()
    src_trgt_map = dict()
    for item in join_nodes:
        if VIEWNODE_KEY in item and is_list(item[MAPPING_KEY]):
            for x in item[MAPPING_KEY]:
                all_join_nodes.setdefault(
                    x[TRGT_KEY],
                    node_aliases[get_obj(item[VIEWNODE_KEY])] + "." + x[SRC_KEY],
                )
        elif VIEWNODE_KEY in item and not is_list(item[MAPPING_KEY]):
            all_join_nodes.setdefault(
                item[MAPPING_KEY][TRGT_KEY],
                node_aliases[get_obj(item[VIEWNODE_KEY])]
                + "."
                + item[MAPPING_KEY][SRC_KEY],
            )
        elif ENTITY_KEY in item and is_list(item[MAPPING_KEY]):
            for x in item[MAPPING_KEY]:
                all_join_nodes.setdefault(
                    x[TRGT_KEY],
                    node_aliases[get_obj(item[ENTITY_KEY])] + "." + x[SRC_KEY],
                )
        elif ENTITY_KEY in item and not is_list(item[MAPPING_KEY]):
            all_join_nodes.setdefault(
                item[MAPPING_KEY][TRGT_KEY],
                node_aliases[get_obj(item[ENTITY_KEY])]
                + "."
                + item[MAPPING_KEY][SRC_KEY],
            )
    for col in othercols:
        if is_elem_aggregated(col):
            src_trgt_map.setdefault(
                col[NAME_KEY],
                wrap_agg(all_join_nodes[col[NAME_KEY]], col[AGG_BEHAV_KEY]),
            )
        else:
            src_trgt_map.setdefault(col[NAME_KEY], all_join_nodes[col[NAME_KEY]])
    return src_trgt_map


def filter_join_nodes(all_nodes):
    join_nodes = list(
        filter(
            lambda x: isinstance(x, dict)
            and x.get(XSITYP_KEY, "") == "Calculation:JoinView",
            all_nodes["calculationView"],
        )
    )
    join_nodes_final = []
    for node in join_nodes:
        if is_list(node):
            if any([JOINTYP_KEY in join for join in node["join"]]):
                join_list = list(filter(lambda x: JOINTYP_KEY in x, node["join"]))
                node["join"] = join_list
                join_nodes_final.append(node)
        else:
            if JOINTYP_KEY in node:
                join_nodes_final.append(node)
    return join_nodes_final


def projection_qry_extract(projection_nodes):
    queries = dict()
    for i, node in enumerate(projection_nodes):
        from_part = get_from_part(node[INPUT_KEY])
        where_part = get_filter_exp(node)
        from_with_filter = from_part + "\n" + where_part
        col_dict = {x[TRGT_KEY]: x[SRC_KEY] for x in node[INPUT_KEY][MAPPING_KEY]}
        columns = get_cols_from_map(
            col_dict
        )  #   [ x[SRC_KEY]+ ' ' + x[TRGT_KEY] for x in node[INPUT_KEY][MAPPING_KEY]  ]
        if node["calculatedViewAttributes"] is not None:
            calc_col_map = get_calc_columns(node["calculatedViewAttributes"])
            calc_cols = get_calc_cols_from_map(calc_col_map)
            columns.extend(calc_cols)
        select_part = SELECT_STR + ", ".join(columns) + "\n"
        full_query = newline_beatify(repl_sap_func(select_part + from_with_filter))
        queries[node["@id"]] = {"query": full_query, "pos": int(i)}
    all_queries.update(queries)


def aggregation_qry_extract(aggregation_nodes):
    queries = dict()
    for i, node in enumerate(aggregation_nodes):
        from_part = get_from_part(node[INPUT_KEY])
        where_part = get_filter_exp(node)
        from_with_filter = from_part + "\n" + where_part
        final_cols = dict()
        group_by_part = GROUP_STR
        source_target_mapping = dict()
        if not isinstance(node[INPUT_KEY][MAPPING_KEY], list):
            source_target_mapping.update(
                {
                    node[INPUT_KEY][MAPPING_KEY][TRGT_KEY]: node[INPUT_KEY][
                        MAPPING_KEY
                    ][SRC_KEY]
                }
            )
        else:
            source_target_mapping.update(
                {x[TRGT_KEY]: x[SRC_KEY] for x in node[INPUT_KEY][MAPPING_KEY]}
            )
        if (
            isinstance(node[ELEM_KEY]["viewAttribute"], list)
            or len(node[ELEM_KEY]["viewAttribute"]) == 1
        ):
            if isinstance(node[ELEM_KEY]["viewAttribute"], dict):
                if node[ELEM_KEY]["viewAttribute"][NAME_KEY] in source_target_mapping:
                    final_cols[
                        node[ELEM_KEY]["viewAttribute"][NAME_KEY]
                    ] = source_target_mapping[node[ELEM_KEY]["viewAttribute"][NAME_KEY]]
                else:
                    final_cols[node[ELEM_KEY]["viewAttribute"][NAME_KEY]] = (
                        node[ELEM_KEY]["viewAttribute"][CALC_KEY][FORMULA_KEY]
                        if is_num_type(node[ELEM_KEY]["viewAttribute"])
                        else node[ELEM_KEY]["viewAttribute"][CALC_KEY][
                            FORMULA_KEY
                        ].replace("+", " || ")
                    )                   
            else:
                for x in node[ELEM_KEY]["viewAttribute"]:
                    if x[NAME_KEY] in source_target_mapping:
                        final_cols[x[NAME_KEY]] = source_target_mapping[x[NAME_KEY]]
                    else:
                        final_cols[x[NAME_KEY]] = (
                            x[CALC_KEY][FORMULA_KEY]
                            if is_num_type(x)
                            else x[CALC_KEY][FORMULA_KEY].replace("+", " || ")
                        )
                    final_cols[x[NAME_KEY]] = (
                        wrap_agg(final_cols[x[NAME_KEY]], x[AGG_BEHAV_KEY])
                        if is_elem_aggregated(x)
                        else final_cols[x[NAME_KEY]]
                    )
        else:
            final_cols[node[ELEM_KEY]["viewAttribute"][NAME_KEY]] = (
                source_target_mapping[node[ELEM_KEY]["viewAttribute"][NAME_KEY]]
                if node[ELEM_KEY]["viewAttribute"][NAME_KEY] in source_target_mapping
                else None
            )
            final_cols[node[ELEM_KEY]["viewAttribute"][NAME_KEY]] = (
                wrap_agg(
                    final_cols[node[ELEM_KEY]["viewAttribute"][NAME_KEY]],
                    node[ELEM_KEY]["viewAttribute"][AGG_BEHAV_KEY],
                )
                if is_elem_aggregated(node[ELEM_KEY]["viewAttribute"])
                else final_cols[node[ELEM_KEY]["viewAttribute"][NAME_KEY]]
            )
        
        if node[CALCS_KEY] is not None and CALC_KEY in node[CALCS_KEY] and node[CALCS_KEY][CALC_KEY] is not None and isinstance(node[CALCS_KEY][CALC_KEY], dict) and len(node[CALCS_KEY][CALC_KEY]) in (4,5) and node[CALCS_KEY][CALC_KEY][NAME_KEY] not in final_cols:
            final_cols[node[CALCS_KEY][CALC_KEY][NAME_KEY]] = (
                        node[CALCS_KEY][CALC_KEY][FORMULA_KEY].replace('"', '') + "::" + node[CALCS_KEY][CALC_KEY][DATATYPE_KEY] + ' AS '
                        if is_num_type(node[CALCS_KEY][CALC_KEY])
                        else node[CALCS_KEY][CALC_KEY][FORMULA_KEY]
                        .replace("+", " || ")
                        )
        elif node[CALCS_KEY] is not None and CALC_KEY in node[CALCS_KEY] and node[CALCS_KEY][CALC_KEY] is not None and isinstance(node[CALCS_KEY][CALC_KEY], dict) and node[CALCS_KEY][CALC_KEY][NAME_KEY] not in final_cols:
            try:
                for x in node[CALCS_KEY][CALC_KEY]:
                    
                        final_cols[x[NAME_KEY]] = (
                            x[FORMULA_KEY].replace('"', '') + "::" + x[DATATYPE_KEY] + ' AS ' 
                            if isinstance(x, dict) and is_num_type(x)
                            else
                            x[FORMULA_KEY]
                            .replace("+", " || ")
                            )
            except:
                final_cols[node[CALCS_KEY][CALC_KEY][NAME_KEY]] = (node[CALCS_KEY][CALC_KEY][FORMULA_KEY].replace("+", " || "))

         
        select_cols = get_cols_from_map(final_cols)    
        select_part = SELECT_STR + ", ".join(select_cols)
        
        full_query = newline_beatify(
            repl_sap_func(select_part + "\n" + from_with_filter + "\n" + group_by_part)
        )
        
        queries[node[NAME_KEY]] = {"query": full_query, "pos": int(i)}
    all_queries.update(queries)


def join_qry_extract(join_nodes):
    queries = dict()
    for i, node in enumerate(join_nodes):
        agg_elements = set(
            [
                x[AGG_BEHAV_KEY]
                for x in node[ELEM_KEY]["viewAttribute"]
                if is_elem_aggregated(x)
            ]
        )
        where_part = get_filter_exp(node)
        group_by_part = ""
        if any(agg_elements):
            group_by_part = GROUP_STR
        full_join = generate_full_join(node, node_name=node[NAME_KEY])
        other_columns = [
            x for x in node[ELEM_KEY]["viewAttribute"] if not is_calc_col(x)
        ]
        other_col_map = get_other_colmap(node, other_columns, node[NAME_KEY])
        calculated_colmap = (
            get_calc_columns(node["calculatedViewAttributes"])
            if is_calc_col(node["calculatedViewAttributes"])
            else {}
        )
        calculated_colmap = get_calc_cols_from_map(calculated_colmap, True)
        source_target_mapping = dict()
        source_target_mapping.update(other_col_map)
        source_target_mapping.update(calculated_colmap)
        select_cols = get_cols_from_map(source_target_mapping, fromnode="JoinNode")
        select_qry = SELECT_STR + ", ".join(select_cols)
        full_query = repl_sap_func(
            select_qry + "\n" + full_join + "\n" + where_part + "\n" + group_by_part
        )
        other_col_map = {"`" + k + "`": v for k, v in other_col_map.items()}
        full_query = newline_beatify(
            replace_dict(query=full_query, rep_dict=other_col_map)
        )
        queries[node[NAME_KEY]] = {"query": full_query, "pos": int(i)}
        
    all_queries.update(queries)


def rank_qry_extract(rank_nodes):
    queries = dict()
    for i, node in enumerate(rank_nodes):
        all_cols = [x[NAME_KEY] for x in node[ELEM_KEY]["viewAttribute"]]
        from_part = get_from_part(node[INPUT_KEY])
        input_mapping = node[INPUT_KEY][MAPPING_KEY]
        source_target_mapping = {}
        if isinstance(input_mapping, list):
            source_target_mapping = {x[TRGT_KEY]: x[SRC_KEY] for x in input_mapping}
        else:
            source_target_mapping.update(
                {input_mapping[TRGT_KEY]: input_mapping[SRC_KEY]}
            )
        partition = node["windowFunction"]["partitionViewAttributeName"]
        partitionby_part = (
            "PARTITION BY " + ",".join(partition)
            if not isinstance(partition, str)
            else "PARTITION BY " + partition
        ) 
        order = node["windowFunction"]["order"]
        rank_threshold = node["windowFunction"]["rankThreshold"]["value"]
        orderby_cols = []
        orderby_part = " ORDER BY "
        if isinstance(order, list):
            orderby_cols = [
                x["@byViewAttributeName"] + " " + x["@direction"] for x in order
            ]
        else:
            orderby_cols.append(
                order["@byViewAttributeName"] + " " + order["@direction"]
            )
        orderby_part = (
            orderby_part + ", ".join(orderby_cols)
            if not isinstance(orderby_cols, str)
            else orderby_part + orderby_cols
        )
        rank_part = "RANK() OVER (" + partitionby_part + orderby_part + ")"
        rank_col = list(filter(lambda x: x not in source_target_mapping, all_cols))
        if len(rank_col) == 0:
            rank_col.append('r1')
            rank_col = rank_col[0]
        where_part = "WHERE " + str(rank_col) + "= " + rank_threshold
        from_with_filter = from_part + "\n" + where_part
        source_target_mapping[rank_col] = rank_part
        all_cols = get_cols_from_map(source_target_mapping)
        select_part = SELECT_STR + ", ".join(all_cols)
        full_query = newline_beatify(
            repl_sap_func(select_part + "\n" + from_with_filter) 
        )
        queries[node[NAME_KEY]] = {"query": full_query, "pos": int(i)}
    all_queries.update(queries)


def union_qry_extract(union_nodes):
    queries = dict()
    for i, node in enumerate(union_nodes):
        from_objects = node[INPUT_KEY]
        from_mapping = []
        from_object_names = []
        select_qrys = []
        source_target_mapping = {}
        for i in range(len(from_objects)):
            from_object_names.append(get_from_part(from_objects[i]))
            from_mapping = from_objects[i][MAPPING_KEY]
            for x in from_mapping:
                if x[XSITYP_KEY] == "Calculation:AttributeMapping":
                    source_target_mapping[x[TRGT_KEY]] = x[SRC_KEY]
                else:
                    source_target_mapping[x[TRGT_KEY]] = "NULL AS"
            select_part = SELECT_STR + ", ".join(
                get_cols_from_map(source_target_mapping)
            )
            select_qrys.append(select_part)
        full_qrys = get_cols_from_map(dict(zip(from_object_names, select_qrys)))
        full_query = newline_beatify(repl_sap_func("\n\nUNION ALL\n\n".join(full_qrys)))
        queries[node[NAME_KEY]] = {"query": full_query, "pos": int(i)}
    all_queries.update(queries)


def main():
    shutil.rmtree("queries", ignore_errors=True)
    Path("queries").mkdir(parents=True, exist_ok=True)
    for subdir, _, files in os.walk("xmls/"):
        for file in files:
            XML_FILE = subdir + file
            SQL_FILE = "queries/" + XML_FILE.split("/")[-1].split(".")[0]
            with open(XML_FILE, "r", encoding="utf-8") as file:
                my_xml = file.read()
                all_queries.clear()
            
            # try:
            print(f"{SQL_FILE}.sql creation has been started")
            global full_xml_nodes
            full_xml_nodes = dict(xmltodict.parse(my_xml))["Calculation:scenario"]
            all_nodes = dict(xmltodict.parse(my_xml))["Calculation:scenario"][
                "calculationViews"
            ]

            projection_nodes = list(
                filter(
                    lambda x: isinstance(x, dict)
                    and x.get(XSITYP_KEY, "") == "Calculation:ProjectionView",
                    all_nodes["calculationView"],
                )
            )
            aggregation_nodes = list(
                filter(
                    lambda x: isinstance(x, dict)
                    and x.get(XSITYP_KEY, "") == "Calculation:AggregationView",
                    all_nodes["calculationView"],
                )
            )
            join_nodes = filter_join_nodes(all_nodes)
            rank_nodes = list(
                filter(
                    lambda x: isinstance(x, dict)
                    and x.get(XSITYP_KEY, "") == "Calculation:RankView",
                    all_nodes["calculationView"],
                )
            )
            union_nodes = list(
                filter(
                    lambda x: isinstance(x, dict)
                    and x.get(XSITYP_KEY, "") == "Calculation:UnionView",
                    all_nodes["calculationView"],
                )
            )

            projection_qry_extract(projection_nodes)
            aggregation_qry_extract(aggregation_nodes)
            join_qry_extract(join_nodes)
            rank_qry_extract(rank_nodes)
            union_qry_extract(union_nodes)

            all_queries_sorted = OrderedDict(
                sorted(all_queries.items(), key=lambda x: x[1]["pos"], reverse=True)
            )
            

            with open(f"{SQL_FILE}.sql", "w", newline="") as f:
                for k, v in all_queries_sorted.items():
                    f.write(f"create or replace temporary view {k.lower()} as (\n" 
                            + 
                            lower_except_quotes(v["query"]).replace("\\'", "'").replace('   ', ' ').replace('\n\n', '\n').replace(",\nif",', if').replace(",\n'",", '").replace(",\n\nif",",\nif").replace('\n\n\n\n', '\n').replace('\n\n\n', '\n').replace('  ', ' ').replace("', \n '","', '").replace(", \n \nif",", if").replace('\n', ' ').replace('\n ', ' ').replace('\r ', ' ').replace(', \n if', ', if').replace('   ', ' ').replace('   ', ' ').replace('   ', ' ').replace(',  ',',\n').replace('in(','').replace("']", "").replace("['", "").replace("(),","(").replace("in () = ",") =").replace("in () =",") =").replace("in () !=",") !=").replace("() , ","(").replace(" in ()",')').replace("to_int","int").replace("to_int","int")
                            + 
                            "\n);\n"
                    )
                    
            print(f"{SQL_FILE}.sql file has been created")
            # except Exception as e:
            #     print(f"\033[91m{XML_FILE} file has incorrect format: {str(e)}\033[0m")

if __name__ == "__main__":
    start_time = time.time()
    main()
    print("Time taken (ms)=", round((time.time() - start_time) * 1000))