In [1]:
import http.client
import re
import json

## Get a simplified list of input variables

In [None]:
conn = http.client.HTTPConnection("www3.finances.gouv.fr")
conn.request("GET", "/calcul_impot/2015/simplifie/calc_s_data.htm")
response = conn.getresponse()

In [None]:
print(response.status, response.reason)

In [None]:
data = response.read()
data = data.decode(encoding='utf-8')

In [None]:
re.findall('<B>(.*)</B>', data)

In [2]:
alias_simple = ['M', 'O', 'B', 'C', 'D', 'V', 'T', 'L',
 'N', 'P', 'F', 'W', 'S', 'G', 'F', 'G',
 'H', 'I', 'R', 'J', 'N', '1AJ', '1BJ', '1CJ',
 '1DJ', '1AP', '1BP', '1CP', '1DP', '1AK', '1BK', '1CK',
 '1DK', '1AI', '1BI', '1CI', '1DI', '1AX', '1AV', '1BX',
 '1BV', '1CX', '1CV', '1DX', '1DV', '1BL', '1CB', '1DQ',
 '1AS', '1BS', '1CS', '1DS', '1AT', '1BT', '1AZ', '1BZ',
 '1CZ', '1DZ', '1AO', '1BO', '1CO', '1DO', '1AW', '1BW',
 '1CW', '1DW', '2DH', '2EE', '2DC', '2FU', '2CH', '2TS',
 '2GO', '2TR', '2FA', '2CG', '2BH', '2CA', '2AB', '2CK',
 '2BG', '2LA', '2LB', '2AA', '2AL', '2AM', '2AN', '2AQ',
 '2AR', '2DM', '3VG', '3VH', '3SG', '3SH', '4BE', '4BA',
 '4BB', '4BC', '4BD', '4BF', '0XX', '6DE', '6GI', '6GJ',
 '6EL', '6EM', '6GP', '6GU', '6DD', '6RS', '6SS', '6RT',
 '6ST', '6RU', '6SU', '6PS', '6PS', '6PT', '6PT', '6PU',
 '6PU', '6PS', '6PS', '6PT', '6PT', '6PU', '6PU', '6QR',
 '6QW', '7UD', '7UF', '7UH', '7XS', '7XT', '7XU', '7XW',
 '7XY', '7VA', '7VC', '7AC', '7AE', '7AG', '7DB', '7DF',
 '7DD', '7DL', '7DQ', '7DG', '7VZ', '7VV', '7VU', '7VT',
 '7VX', '7CD', '7CE', '7GA', '7GE', '7GB', '7GF', '7GC',
 '7GG', '7EA', '7EB', '7EC', '7ED', '7EF', '7EG', '7GZ',
 '7UK', '7VO', '7TD', '7WN', '7WO', '7WM', '7WP', '7WE',
 '7WG', '7SD', '7SA', '7SE', '7SB', '7SF', '7SC', '7WC',
 '7WB', '7SG', '7RG', '7VG', '7VH', '7SH', '7RH', '7SI',
 '7RI', '7WT', '7WU', '7SJ', '7RJ', '7SK', '7RK', '7SL',
 '7RL', '7SN', '7RN', '7SP', '7RP', '7SR', '7RR', '7SS',
 '7RS', '7SQ', '7RQ', '7ST', '7RT', '7SV', '7TV', '7SW',
 '7TW', '7RV', '7RW', '7RZ', '7WJ', '7WL', '8BY', '8CY',
 '8UT', '8TF', '8TI', '8TK']

In [None]:
for v in ['0AC', '0AM', '0AD', '0AV',
'0AO', '0CF', '1AJ', '1AP',
'1AS', '1BJ', '1BP', '1BS',
'2DC', '2TR', '2CK', '6DE',
'7UF']:
    if v not in alias_simple:
        print(v)


In [3]:
alias_simple += ['0AC', '0AM', '0AD', '0AV', '0AO', '0CF']

In [4]:
len(alias_simple)

234

## Precompute graph for other variables

In [5]:
from function_set_std import functions_mapping 

In [6]:
with open('../json/computing_order.json', 'r') as f:
    computing_order = json.load(f)

with open('../json/formulas_light.json', 'r') as f:
    formulas_light = json.load(f)

with open('../json/constants_light.json', 'r') as f:
    constants_light = json.load(f)

with open('../json/inputs_light.json', 'r') as f:
    inputs_light = json.load(f)

with open('../json/unknowns_light.json', 'r') as f:
    unknowns_light = json.load(f)

with open('../json/input_variables.json', 'r') as f:
    input_variables = json.load(f)


In [7]:
alias2name = {i['alias']: i['name'] for i in input_variables}

In [8]:
inputs_simple = []
for alias in alias_simple:
    if alias in alias2name:
        inputs_simple.append(alias2name[alias])
    else:
        print('Alias %s not found.'%alias)

Alias M not found.
Alias O not found.
Alias B not found.
Alias C not found.
Alias D not found.
Alias V not found.
Alias T not found.
Alias L not found.
Alias N not found.
Alias P not found.
Alias F not found.
Alias W not found.
Alias S not found.
Alias G not found.
Alias F not found.
Alias G not found.
Alias H not found.
Alias I not found.
Alias R not found.
Alias J not found.
Alias N not found.


In [9]:
def compute_formula(node):
    nodetype = node['nodetype']

    if nodetype == 'symbol':
        name = node['name']
        if name in formulas_simple:
            return node

        if name in useless_simple:
            return useless_simple[name]

        if name in inputs_light:
            if name in inputs_simple:
                return node
            return {'nodetype': 'float', 'value': 0.}

        if name in unknowns_light:
            return {'nodetype': 'float', 'value': 0.}

        raise Exception('Unknown variable category for %s.'%name)

    if nodetype == 'float':
        return node

    if nodetype == 'call':
        name = node['name']
        args = [compute_formula(child) for child in node['args']]
        return {'nodetype': 'call', 'name': name, 'args': args}

    raise ValueError('Unknown type : %s'%nodetype)


In [10]:
formulas_simple = {}
computing_order_simple = []
useless_simple = {
    k: {'nodetype': 'float', 'value': v}
    for k, v in constants_light.items()
}

for variable in computing_order:
    formula = formulas_light[variable]
    result = compute_formula(formula)
    nodetype = result['nodetype']
    if nodetype == 'call':
        formulas_simple[variable] = result
        computing_order_simple.append(variable)
    else:
        print('Useless variable : %s.'%variable)
        useless_simple[variable] = result


Useless variable : PRV.
Useless variable : TTSB4.
Useless variable : TTSB1.
Useless variable : TTSB2.
Useless variable : TTSB3.
Useless variable : TTS4.
Useless variable : PRC.
Useless variable : TTS1.
Useless variable : TTS2.
Useless variable : TTS3.
Useless variable : LIMDPAE.
Useless variable : TSPEBASABP.
Useless variable : TSPEBASABC.
Useless variable : TSPEBASABV.
Useless variable : 1RNC.
Useless variable : 2RNC.
Useless variable : 1RIA.
Useless variable : 2RIA.
Useless variable : TSB1.
Useless variable : TSB4.
Useless variable : TSB2.
Useless variable : TSB3.
Useless variable : TS1.
Useless variable : TS3.
Useless variable : TS4.
Useless variable : TS2.
Useless variable : REVRVO.
Useless variable : RRFTEO.
Useless variable : SPEBASABV.
Useless variable : SPEBASABC.
Useless variable : SPEBASABP.
Useless variable : REB.
Useless variable : PDEDMINC.
Useless variable : PERPSALDC.
Useless variable : PDEDMINV.
Useless variable : PERPSALDV.
Useless variable : XDFC.
Useless variable : X

## Some simplifications

In [11]:
def replace(a, b):
    keys_to_del = list(a)
    for k in keys_to_del:
        del a[k]
    for k in b:
        a[k] = b[k]

In [12]:
def simplify_formula(node):
    modify = False
    nodetype = node['nodetype']

    if nodetype == 'symbol':
        name = node['name']
        if name in useless_simple:
            replace(node, useless_simple[name])
            return True
        return False

    if nodetype == 'float':
        return False

    if nodetype == 'call':
        name = node['name']
                            
        if name == '+':
            if node['args'] == []:
                replace(node, {'nodetype': 'float', 'value': 0.})
                return True
            
            if len(node['args']) == 1:
                arg = node['args'][0]
                keys_to_del = list(node)
                for k in keys_to_del:
                    del node[k]
                for k in arg:
                    node[k] = arg[k]
                return True
            
            nargs = len(node['args'])
            node['args'] = [arg for arg in node['args'] if arg != {'nodetype': 'float', 'value': 0.}]
            if nargs != len(node['args']):
                modify = True
            
        if name == '*':
            if node['args'] == []:
                replace(node, {'nodetype': 'float', 'value': 1.})
                return True
            
            if len(node['args']) == 1:
                arg = node['args'][0]
                replace(node, arg)
                return True

            nargs = len(node['args'])
            node['args'] = [arg for arg in node['args'] if arg != {'nodetype': 'float', 'value': 1.}]
            if nargs != len(node['args']):
                modify = True
            
            for arg in node['args']:
                if arg == {'nodetype': 'float', 'value': 0.}:
                    replace(node, {'nodetype': 'float', 'value': 0.})
                    return True

        for arg in node['args']:
            modify = modify or simplify_formula(arg)
            
        name = node['name']
        args = node['args']
        nodetypes = [arg['nodetype'] for arg in args]
        if set(nodetypes) == set(['float']):
            values = [arg['value'] for arg in args]
            function = functions_mapping[name]
            value = function(values)
            assert(type(value).__name__ == 'float')
            replace(node, {'nodetype': 'float', 'value': value})
            return True
            
        return modify
            

    raise ValueError('Unknown type : %s'%nodetype)


In [13]:
modify = True
while modify == True:
    modify = False
    new_computing_order = []
    new_formulas = {}
    for variable in computing_order_simple:
        formula = formulas_simple[variable]
        modify = modify or simplify_formula(formula)
        if formula['nodetype'] == 'call':
            new_computing_order.append(variable)
            new_formulas[variable] = formula
        else:
            print('Useless variable : %s.'%variable)
            useless_simple[variable] = formula
    computing_order_simple = new_computing_order
    formulas_simple = new_formulas

Useless variable : BOOL_0AZ.
Useless variable : EAC.
Useless variable : PAC.
Useless variable : NIN.
Useless variable : NBQAR2.
Useless variable : NBQAR3.
Useless variable : NBQAR4.
Useless variable : BOOL_0BT.
Useless variable : NBQART.
Useless variable : NBQAR1.
Useless variable : NBQAR.
Useless variable : AGC.
Useless variable : AGV.
Useless variable : NSA.
Useless variable : NCC.
Useless variable : NPS.
Useless variable : NSP.
Useless variable : VARIPTEFP.
Useless variable : BIHTAV.
Useless variable : T2TSNV.
Useless variable : TEXTSV.
Useless variable : TTSBV.
Useless variable : TPRB3.
Useless variable : T2PRB3.
Useless variable : TEXPR3.
Useless variable : TPLR3.
Useless variable : TIND_APB3.
Useless variable : T2PRBV.
Useless variable : TEXPRV.
Useless variable : T2PRBC.
Useless variable : TEXPRC.
Useless variable : TPRB4.
Useless variable : T2PRB4.
Useless variable : TEXPR4.
Useless variable : TPLR4.
Useless variable : TAPB4.
Useless variable : T2PRB1.
Useless variable : TEXPR1

## Graph study

In [14]:
len(formulas_simple)

2684

In [15]:
var = computing_order_simple[100]
print(var + json.dumps(formulas_simple[var], indent=4))

TTSN1TT{
    "nodetype": "call",
    "name": "-",
    "args": [
        {
            "nodetype": "symbol",
            "name": "TABTS1TT"
        }
    ]
}


In [16]:
formulas_simple['TABTS1TT']

{'args': [{'nodetype': 'float', 'value': 0.0},
  {'args': [{'name': 'TREP10V', 'nodetype': 'symbol'},
    {'args': [{'name': 'TABTS1AJ', 'nodetype': 'symbol'}],
     'name': '-',
     'nodetype': 'call'},
    {'args': [{'name': 'TABTS1AC', 'nodetype': 'symbol'}],
     'name': '-',
     'nodetype': 'call'},
    {'args': [{'name': 'TABTS1AP', 'nodetype': 'symbol'}],
     'name': '-',
     'nodetype': 'call'},
    {'args': [{'name': 'TABTS3VJ', 'nodetype': 'symbol'}],
     'name': '-',
     'nodetype': 'call'}],
   'name': '+',
   'nodetype': 'call'}],
 'name': 'max',
 'nodetype': 'call'}

In [17]:
formulas_simple['TREP10V']

{'args': [{'args': [{'name': 'TIND_10V', 'nodetype': 'symbol'},
    {'name': 'TD10MV', 'nodetype': 'symbol'}],
   'name': '*',
   'nodetype': 'call'},
  {'args': [{'args': [{'nodetype': 'float', 'value': 1.0},
      {'args': [{'name': 'TIND_10V', 'nodetype': 'symbol'}],
       'name': '-',
       'nodetype': 'call'}],
     'name': '+',
     'nodetype': 'call'},
    {'name': 'TFPTV', 'nodetype': 'symbol'}],
   'name': '*',
   'nodetype': 'call'}],
 'name': '+',
 'nodetype': 'call'}

In [18]:
formulas_simple['TIND_10V']

{'args': [{'args': [{'name': 'T10MINSV', 'nodetype': 'symbol'},
    {'args': [{'name': 'TFRDV', 'nodetype': 'symbol'}],
     'name': '-',
     'nodetype': 'call'}],
   'name': '+',
   'nodetype': 'call'}],
 'name': 'positif_ou_nul',
 'nodetype': 'call'}

In [19]:
formulas_simple['T10MINSV']

{'args': [{'args': [{'name': 'TTSBNV', 'nodetype': 'symbol'},
    {'name': 'TDEDMINV', 'nodetype': 'symbol'}],
   'name': 'min',
   'nodetype': 'call'},
  {'name': 'TDFNV', 'nodetype': 'symbol'}],
 'name': 'max',
 'nodetype': 'call'}

In [20]:
formulas_simple['TTSBNV']

{'args': [{'name': 'TSHALLOV', 'nodetype': 'symbol'},
  {'name': 'ALLOV', 'nodetype': 'symbol'}],
 'name': '+',
 'nodetype': 'call'}

In [21]:
def get_children(node):
    nodetype = node['nodetype']

    if nodetype == 'symbol':
        name = node['name']
        return set([name])
        
    elif nodetype == 'float':
        return set()
    
    elif nodetype == 'call':
        args = node['args']
        children = set()
        for arg in args:
            children = children | get_children(arg)

        return children
        
    raise ValueError('Unknown type : %s'%nodetype)

In [22]:
children_dict_simple = {}
for name, formula in formulas_simple.items():
    children_dict_simple[name] = get_children(formula)

In [23]:
# IDRS3, REVKIRE are already computed
to_inspect = ['NBPT', 'BCSG', 'BRDS', 'IBM23', 'TXMOYIMP', 'NAPTIR', 'IINET', 'RRRBG', 'RNI', 'IAVIM']
dependencies_formulas = []
dependencies_inputs = []

while to_inspect:
    node = to_inspect.pop()
    
    if node in dependencies_formulas:
        continue
     
    for child in children_dict_simple[node]:
        if child in formulas_simple:
            if (child not in dependencies_formulas) and (child not in to_inspect):
                to_inspect.append(child)
        elif child in inputs_simple:
            if child not in dependencies_inputs:
                dependencies_inputs.append(child)
        else:
            raise Exception('Unknown variable category : %s for parent %s.'%(child, node))

    dependencies_formulas.append(node)


In [24]:
len(dependencies_formulas)

1658

## Ignore unused formulas

In [25]:
new_computing_order = []
for var in computing_order_simple:
    if var in dependencies_formulas:
        new_computing_order.append(var)
computing_order_simple = new_computing_order
formulas_simple = {var: formulas_simple[var] for var in dependencies_formulas}

In [26]:
with open('../json/formulas_simple.json', 'w') as f:
    f.write(json.dumps(formulas_simple)) 
with open('../json/computing_order_simple.json', 'w') as f:
    f.write(json.dumps(computing_order_simple)) 
with open('../json/inputs_simple.json', 'w') as f:
    f.write(json.dumps(inputs_simple))

## Test it

In [27]:
import compute_simple
import function_set_np

In [28]:
alias_values = {'1AJ': 30000.}
input_values = compute_simple.prepare(alias_values)

In [29]:
compute_simple.compute(input_values)

array([ 2461.,  2461.,  2461.,  2461.,  2461.,  2461.,  2461.,  2461.,
        2461.,  2461.])