In [4]:
import pandas as pd
import formulas
import openpyxl
import numpy as np
from tqdm import tqdm
tqdm.pandas()


def get_cell(dataframe, cell_addr):
    sheet, cell_addr = cell_addr.split('!')
    
    selector = (dataframe['sheet']==sheet.upper()) & (dataframe['address']==cell_addr)
    filtered_df = dataframe.loc[selector]

    if len(filtered_df) > 0:
        first_data = filtered_df.iloc[0].to_dict()
        return first_data
    else:
        return -1


def get_cell_value(dataframe, cell_addr):
    '''
    get cell calculated value, else -1
    
    '''
    if ':' in cell_addr:
        cell_sheet, cell_addr = cell_addr.split('!')
        start_cell, end_cell = cell_addr.split(':')
        start_col, start_row = openpyxl.utils.cell.coordinate_to_tuple(start_cell)
        end_col, end_row = openpyxl.utils.cell.coordinate_to_tuple(end_cell)

        value = []
        for col in range(start_col, end_col + 1):
            value_col = []
            for row in range(start_row, end_row + 1):
                cell_address = cell_sheet + '!' + openpyxl.utils.cell.get_column_letter(row) + str(col)
                cell_value = get_cell(dataframe, cell_address)
                if cell_value != -1:
                    cell_value = cell_value['result']

                value_col.append(cell_value)
            value.append(value_col)

        return value
    else:
        cell = get_cell(dataframe, cell_addr)
        if cell == -1:
            return -1
        return cell['result']


def calculate(dataframe, formula):
    if isinstance(formula, str) and formula.startswith('='):
        try:
            func = formulas.Parser().ast(formula)[1].compile()
        except Exception as e:
            print(f'The formular is not passable: {formula}')
            raise e
            return -1 
        func_input_addr = list(func.inputs)

        func_inputs = []
        for addr in func_input_addr:
            cell_value = get_cell_value(dataframe, addr)

            func_inputs.append(np.array(cell_value))
        
        result = func(*func_inputs)

        calculation_error = False
        if isinstance(result, list):
            if len(result) > 0:
                if len(result[0]) > 0:
                    if isinstance(result[0][0], formulas.tokens.operand.XlError):
                        calculation_error = True

        if not calculation_error:
            return result
        else:
            return -1
    else:
        return formula


formular_df = pd.read_csv('output/formular.csv')
formular_df['sheet'] = formular_df['sheet'].apply(lambda x: x.upper())
formular_df['result'] = formular_df['value'].apply(lambda x: -1 if isinstance(x, str) and x.startswith('=') else x)


# target_title_addr = 'For_integration_V02!BI4'
# target_formular_addr = 'For_integration_V02!BI5'

# title = get_cell(formular_df, target_title_addr)['value']
# formula = get_cell(formular_df, target_formular_addr)['formula_addr']

formular_df['result'] = formular_df['formula_addr'].progress_apply(lambda x: calculate(formular_df, x))
# formular_df[268:269]['formula_addr'].progress_apply(lambda x: calculate(formular_df, x))

  0%|          | 0/3144 [00:00<?, ?it/s]

100%|██████████| 3144/3144 [00:20<00:00, 150.81it/s]


In [11]:
formular_df['result'] = formular_df['formula_addr'].progress_apply(lambda x: calculate(formular_df, x))

100%|██████████| 3144/3144 [00:21<00:00, 146.78it/s]


In [10]:
formular_df.iloc[1157]

Unnamed: 0                                                   1157
sheet                                         FOR_INTEGRATION_V02
address                                                       AX5
value                                   =IF(K5/J5<0.2,"Yes","No")
formula_addr    =IF(For_integration_V02!K5/For_integration_V02...
result                                                        Yes
Name: 1157, dtype: object

In [4]:
addr = list(func.inputs)[-1]
addr

'FOR_INTEGRATION_V02!Q5'

In [36]:
get_cell(formular_df, addr)

{'Unnamed: 0': 1148,
 'sheet': 'FOR_INTEGRATION_V02',
 'address': 'Q5',
 'value': 'مستحق للدعم',
 'formula_addr': 'مستحق للدعم'}

In [5]:


target_title_addr = 'For_integration_V02!BU4'
target_formular_addr = 'For_integration_V02!BU5'

title = get_cell(formular_df, target_title_addr)['value']
formula = get_cell(formular_df, target_formular_addr)['formula_addr']
print(title, formula)

func = formulas.Parser().ast(formula)[1].compile()

Aff_Max_DP Interest =INDEX(Assumption!C11:I36,MATCH(For_integration_V02!BT5,Assumption!B11:B36,0),MATCH(For_integration_V02!Q5,Assumption!C10:I10,0))


In [10]:
func.inputs

OrderedDict([('ASSUMPTION!B11:B36', <Ranges>(ASSUMPTION!B11:B36)),
             ('ASSUMPTION!C10:I10', <Ranges>(ASSUMPTION!C10:I10)),
             ('ASSUMPTION!C11:I36', <Ranges>(ASSUMPTION!C11:I36)),
             ('FOR_INTEGRATION_V02!BT5', <Ranges>(FOR_INTEGRATION_V02!BT5)),
             ('FOR_INTEGRATION_V02!Q5', <Ranges>(FOR_INTEGRATION_V02!Q5))])

In [19]:
import numpy as np

func(np.array([4,5,6]),np.array([1,2,3]),np.array([[1,2,3],[4,5,6],[7,8,9]]),np.array(4),1)

Array([[1]], dtype=object)

In [13]:
formulas.Ranges(ranges=[1,2,3])

TypeError: 'int' object is not subscriptable