In [57]:
import nbimporter
import pandas as pd
from collections import OrderedDict
import read_report_cobol as cobol_reader # type: ignore
import read_report_java as java_reader # type: ignore

def get_employee_types():
    employee_types = ['clt', 'autarquico']
    return employee_types

def get_common_table_types():
    common_table_types = ['apropriado', 'realizado']
    return common_table_types

def subtract_dataframes(df1: pd.DataFrame, df2: pd.DataFrame, items:list) -> pd.DataFrame:
    df3 = df1 - df2
    if items is not None:
        df3 = pd.concat([items, df3], axis=1)
    return df3

def correct_autarquico(cobol:pd.DataFrame) -> pd.DataFrame:
    correct_cobol = cobol.iloc[:3,:].copy()
    total_line = pd.to_numeric(correct_cobol.iloc[0, 1:], errors='coerce') + pd.to_numeric(correct_cobol.iloc[2, 1:], errors='coerce')
    correct_cobol.iloc[1, 1:4] = 0.00
    correct_cobol.iloc[1, 5] = correct_cobol.iloc[1, 4]
    correct_cobol.loc[3] = ['total'] + total_line.tolist()
    correct_cobol = correct_cobol.reindex([0,2,3,1])
    correct_cobol = correct_cobol.reset_index(drop=True)
    return correct_cobol

def compare_clt_apropriado(cobol:pd.DataFrame, java:pd.DataFrame) -> pd.DataFrame:
    items = java.iloc[:,0]
    df_j =  java.iloc[:,1:].apply(pd.to_numeric, errors='coerce')
    df_c = cobol.iloc[:,1:].apply(pd.to_numeric, errors='coerce')
    return subtract_dataframes(df_j, df_c, items)

def compare_clt_realizado(cobol:pd.DataFrame, java:pd.DataFrame) -> pd.DataFrame:
    items = cobol.iloc[:,0]
    df_j  = java.iloc[2:,1:].apply(pd.to_numeric, errors='coerce')
    df_j  = df_j.reset_index(drop=True)
    df_c  = cobol.iloc[:,1:].apply(pd.to_numeric, errors='coerce')
    return subtract_dataframes(df_j, df_c, items)

def compare_autarquico(cobol:pd.DataFrame, java:pd.DataFrame) -> pd.DataFrame:
    cobol = correct_autarquico(cobol)
    return cobol, compare_clt_apropriado(cobol, java)

def compare(emp_type:str, tb_type:str, c_report:pd.DataFrame, j_report:pd.DataFrame):
    df_diff = None
    cobol   = c_report[emp_type][tb_type]
    java    = j_report[emp_type][tb_type]
    if emp_type == get_employee_types()[0] and tb_type == get_common_table_types()[0]:
        df_diff = compare_clt_apropriado(cobol, java)
    elif emp_type == get_employee_types()[0] and tb_type == get_common_table_types()[1]:
        df_diff = compare_clt_realizado(cobol, java)
    elif emp_type == get_employee_types()[1]:
        cobol, df_diff = compare_autarquico(cobol, java)
    return cobol, java, df_diff

def is_valid_format(cobol_reports, java_reports):
    return list(cobol_reports.keys()) == list(java_reports.keys())

def add_to_results(results:pd.DataFrame, key:int, emp_type:str, tb_type:str, cobol:pd.DataFrame, java:pd.DataFrame, diff:pd.DataFrame) -> pd.DataFrame:
    if key not in results:
        results[key] = {}
    if emp_type not in results[key]:
        results[key][emp_type] = {}
    
    results[key][emp_type][tb_type] = {
        'cobol' : cobol,
        'java' : java,
        'diferenca' : diff
    }
    return results

def read_files(cobol_file_path:str, java_file_path:str):
    cobol_reports = cobol_reader.get_cobol_data_from(cobol_file_path)
    java_reports  = java_reader.get_java_data_from(java_file_path)
    cobol_reports = OrderedDict(sorted(cobol_reports.items()))
    java_reports  = OrderedDict(sorted(java_reports.items()))
    return cobol_reports, java_reports

def get_comparative_report(cobol_file_path:str, java_file_path:str):
    results = {}
    cobol_reports, java_reports = read_files(cobol_file_path, java_file_path)
    if is_valid_format(cobol_reports, java_reports):
        for (c_key, c_report), (j_key, j_report) in zip(cobol_reports.items(), java_reports.items()):
            for emp_type in get_employee_types():
                for tb_type in get_common_table_types():
                    cobol, java, diff = compare(emp_type, tb_type, c_report, j_report)
                    results = add_to_results(results, c_key, emp_type, tb_type, cobol, java, diff)
    return results


In [59]:
cobol_file_path = "RHPP05LA.PDF"
java_file_path  = "RHPP05LA - GERAL.pdf"

results = get_comparative_report(cobol_file_path, java_file_path)

code = 40434
tipo_funcionario = 'clt'
tipo_tabela = 'apropriado'

display(code, tipo_funcionario, tipo_tabela)
display('cobol', results[code][tipo_funcionario][tipo_tabela]['cobol'])
display('java', results[code][tipo_funcionario][tipo_tabela]['java'])
display('diferenca', results[code][tipo_funcionario][tipo_tabela]['diferenca'])

40434

'clt'

'apropriado'

'cobol'

Unnamed: 0,Unnamed: 1,ferias,13_ferias,total_ferias,13_salario,total
0,apropriacao,360940.9,120313.76,481254.66,360940.9,842195.56
1,inss,61317.13,20439.04,81756.17,61317.13,143073.3
2,ac_trab,1531.42,510.1,2041.52,1531.42,3572.94
3,fgts,24526.11,8175.33,32701.44,24526.11,57227.55
4,spprevcom,0.0,0.0,0.0,1668.03,1668.03
5,total_encargos,87374.66,29124.47,116499.13,89042.69,205541.82
6,total,448315.56,149438.23,597753.79,449983.59,1047737.38


'java'

Unnamed: 0,Unnamed: 1,ferias,13_ferias,total_ferias,13_salario,total
0,apropriacao,364641.26,121545.68,486186.94,364641.26,850828.2
1,inss,72926.76,24308.0,97234.76,62057.5,159292.26
2,ac_trab,1821.5,605.96,2427.46,1549.83,3977.29
3,fgts,29169.32,9721.89,38891.21,24822.2,63713.41
4,spprevcom,0.0,0.0,0.0,1959.04,1959.04
5,total_encargos,103917.58,34635.85,138553.43,90388.57,228942.0
6,total,468558.84,156181.53,624740.37,455029.83,1079770.2


'diferenca'

Unnamed: 0,Unnamed: 1,ferias,13_ferias,total_ferias,13_salario,total
0,apropriacao,3700.36,1231.92,4932.28,3700.36,8632.64
1,inss,11609.63,3868.96,15478.59,740.37,16218.96
2,ac_trab,290.08,95.86,385.94,18.41,404.35
3,fgts,4643.21,1546.56,6189.77,296.09,6485.86
4,spprevcom,0.0,0.0,0.0,291.01,291.01
5,total_encargos,16542.92,5511.38,22054.3,1345.88,23400.18
6,total,20243.28,6743.3,26986.58,5046.24,32032.82
