In [1]:
import numpy as np


# main

In [2]:
import pandas as pd
from pathlib import Path
import json
from generate_dpe_annexes_scripts import td001_processing
from generate_dpe_annexes_scripts.td001_processing import postprocessing_td001
from generate_dpe_annexes_scripts.utils import round_float_cols, unique_ordered
from config import paths
from multiprocessing import Pool
from generate_dpe_annexes_scripts.td007_processing import merge_td007_tr_tv, postprocessing_td007, generate_pb_table, \
    generate_ph_table, generate_murs_table, agg_td007_murs_to_td001, agg_td007_ph_to_td001, agg_td007_pb_to_td001

from generate_dpe_annexes_scripts.td008_processing import merge_td008_tr_tv, postprocessing_td008
from generate_dpe_annexes_scripts.td001_merge import merge_td001_dpe_id_envelope
from generate_dpe_annexes_scripts.td007_processing import agg_td007_to_td001_essential, agg_surface_envelope
from generate_dpe_annexes_scripts.td008_processing import agg_td008_to_td001_essential, agg_td008_to_td001
from generate_dpe_annexes_scripts.td010_processing import merge_td010_tr_tv, postprocessing_td010, agg_td010_td001
from generate_dpe_annexes_scripts.td011_td012_processing import merge_td012_tr_tv, postprocessing_td012, merge_td011_tr_tv, \
    agg_systeme_chauffage_essential
from generate_dpe_annexes_scripts.td013_td014_processing import merge_td013_tr_tv, postprocessing_td014, merge_td014_tr_tv, \
    agg_systeme_ecs_essential
from generate_dpe_annexes_scripts.td001_merge import merge_td001_dpe_id_system
from generate_dpe_annexes_scripts.doc_annexe import td001_annexe_enveloppe_agg_desc, td001_sys_ch_agg_desc, td001_sys_ecs_agg_desc, \
    td007_annexe_desc, td008_annexe_desc, td012_annexe_desc, td014_annexe_desc, enums_cstb, \
    td001_annexe_generale_desc


def run_enveloppe_processing(td001, td006, td007, td008, td010):
    td008_raw_cols = td008.columns.tolist()
    td007_raw_cols = td007.columns.tolist()
    td010_raw_cols = td010.columns.tolist()

    td001, td006, td007, td008, td010 = merge_td001_dpe_id_envelope(td001=td001, td006=td006, td007=td007, td008=td008,
                                                                    td010=td010)
    # POSTPRO DES TABLES
    td008 = merge_td008_tr_tv(td008)
    td008 = postprocessing_td008(td008)

    td007 = merge_td007_tr_tv(td007)
    td007 = postprocessing_td007(td007, td008)

    td010 = merge_td010_tr_tv(td010)
    td010 = postprocessing_td010(td010)

    # TABLES PAR TYPE COMPOSANT
    td007_pb = generate_pb_table(td007)
    td007_ph = generate_ph_table(td007)
    td007_murs = generate_murs_table(td007)

    # TABLES SYNTHETIQUES TOUTES THEMATIQUES

    td007_agg_essential = agg_td007_to_td001_essential(td007)
    td008_agg_essential = agg_td008_to_td001_essential(td008)
    surfaces_agg_essential = agg_surface_envelope(td007, td008)

    td001_enveloppe_agg = pd.concat([td007_agg_essential, td008_agg_essential, surfaces_agg_essential], axis=1)

    td001_enveloppe_agg.index.name = 'td001_dpe_id'
    cols = [el for el in td008.columns if el not in td008_raw_cols + ['fen_lib_from_tv009',
                                                                      'fen_lib_from_tv021']]
    cols.append('td008_baie_id')
    cols = unique_ordered(cols)
    td008_p = td008[cols]
    cols = [el for el in td007.columns if
            el not in td007_raw_cols + ["qualif_surf", 'surface_paroi_opaque_calc', 'surface_paroi_totale_calc_v1',
                                        'surface_paroi_totale_calc_v2']]
    cols.append('td007_paroi_opaque_id')
    cols = unique_ordered(cols)
    td007_p = td007[cols]

    cols = [el for el in td010.columns if
            el not in td010_raw_cols]
    cols.append('td010_pont_thermique_id')
    cols = unique_ordered(cols)
    td010_p = td010[cols]

    # TABLES AGGREGEES PAR TYPE COMPOSANT
    td007_murs_agg = agg_td007_murs_to_td001(td007_murs)
    td007_ph_agg = agg_td007_ph_to_td001(td007_ph)
    td007_pb_agg = agg_td007_pb_to_td001(td007_pb)
    td008_agg = agg_td008_to_td001(td008)
    td010_agg = agg_td010_td001(td010)

    env_compo_dict = dict(td007_paroi_opaque=td007_p,
                          td007_ph=td007_ph,
                          td007_pb=td007_pb,
                          td007_murs=td007_murs,
                          td008_baie=td008_p,
                          td010_pont_thermique=td010_p)

    env_compo_agg_dict = dict(td007_murs_agg=td007_murs_agg,
                              td007_ph_agg=td007_ph_agg,
                              td007_pb_agg=td007_pb_agg, td008_agg=td008_agg, td010_agg=td010_agg)

    return td001_enveloppe_agg, td008_p, td007_p, env_compo_dict, env_compo_agg_dict


def run_system_processing(td001, td006, td011, td012, td013, td014):
    td011_raw_cols = td011.columns.tolist()
    td012_raw_cols = td012.columns.tolist()
    td013_raw_cols = td013.columns.tolist()
    td014_raw_cols = td014.columns.tolist()
    td001, td006, td011, td012, td013, td014 = merge_td001_dpe_id_system(td001, td006, td011, td012, td013, td014)
    td011 = merge_td011_tr_tv(td011)
    td012 = merge_td012_tr_tv(td012)
    td013 = merge_td013_tr_tv(td013)
    td014 = merge_td014_tr_tv(td014)

    td012 = postprocessing_td012(td012)

    cols = [el for el in td011.columns if el not in td011_raw_cols]
    cols.append('td011_installation_chauffage_id')
    cols = unique_ordered(cols)
    td011_p = td011[cols]

    cols = [el for el in td012.columns if
            el not in td012_raw_cols + ['besoin_chauffage_infer', 'gen_ch_concat_txt_desc']]
    cols.append('td012_generateur_chauffage_id')
    cols = unique_ordered(cols)
    td012_p = td012[cols]

    td001_sys_ch_agg = agg_systeme_chauffage_essential(td001, td011, td012)

    td014 = postprocessing_td014(td013, td014)

    cols = [el for el in td013.columns if el not in td013_raw_cols]
    cols.append('td013_installation_ecs_id')
    cols = unique_ordered(cols)
    td013_p = td013[cols]

    cols = [el for el in td014.columns if
            el not in td014_raw_cols + ['score_gen_ecs_lib_infer', 'gen_ecs_concat_txt_desc']]
    cols.append('td014_generateur_ecs_id')
    cols = unique_ordered(cols)
    td014_p = td014[cols]

    td001_sys_ecs_agg = agg_systeme_ecs_essential(td001, td013, td014)

    return td011_p, td012_p, td001_sys_ch_agg, td013_p, td014_p, td001_sys_ecs_agg


def build_doc(annexe_dir):
    doc_annexe = dict()
    doc_annexe['td001_annexe_generale'] = td001_annexe_generale_desc
    doc_annexe['td001_annexe_enveloppe_agg'] = td001_annexe_enveloppe_agg_desc
    doc_annexe['td001_sys_ch_agg'] = td001_sys_ch_agg_desc
    doc_annexe['td001_sys_ecs_agg'] = td001_sys_ecs_agg_desc
    doc_annexe['td007_annexe'] = td007_annexe_desc
    doc_annexe['td008_annexe'] = td008_annexe_desc
    doc_annexe['td012_annexe'] = td012_annexe_desc
    doc_annexe['td014_annexe'] = td014_annexe_desc

    with open(annexe_dir / 'doc_table_annexes_cstb.json', 'w', encoding='utf-8') as f:
        json.dump(doc_annexe, f, indent=4)

    with open(annexe_dir / 'enum_table_annexes_cstb.json', 'w', encoding='utf-8') as f:
        json.dump(enums_cstb, f, indent=4)


data_dir = paths['DPE_DEPT_PATH']
annexe_dir = paths['DPE_DEPT_ANNEXE_PATH']
annexe_dir = Path(annexe_dir)
annexe_dir.mkdir(exist_ok=True, parents=True)


def run_postprocessing_by_depts(dept_dir):
    print(dept_dir)
    annexe_dept_dir = annexe_dir / dept_dir.name
    annexe_dept_dir.mkdir(exist_ok=True, parents=True)
    # LOAD TABLES
    td007 = pd.read_csv(dept_dir / 'td007_paroi_opaque.csv', dtype=str)
    td006 = pd.read_csv(dept_dir / 'td006_batiment.csv', dtype=str)
    td001 = pd.read_csv(dept_dir / 'td001_dpe.csv', dtype=str)
    td008 = pd.read_csv(dept_dir / 'td008_baie.csv', dtype=str)
    td008 = td008.drop('td008_baie_id', axis=1)
    td010 = pd.read_csv(dept_dir / 'td010_pont_thermique.csv', dtype=str)

    # ENVELOPPE PROCESSING
    td001_enveloppe_agg, td008_p, td007_p, env_compo_dict, env_compo_agg_dict = run_enveloppe_processing(td001,
                                                                                                         td006,
                                                                                                         td007,
                                                                                                         td008,
                                                                                                         td010)

    round_float_cols(td001_enveloppe_agg).to_csv(annexe_dept_dir / 'td001_enveloppe_agg_annexe.csv')
    round_float_cols(td007_p).to_csv(annexe_dept_dir / 'td007_paroi_opaque_annexe.csv')
    round_float_cols(td008_p).to_csv(annexe_dept_dir / 'td008_baie_annexe.csv')
    for k, v in env_compo_dict.items():
        round_float_cols(v).to_csv(annexe_dept_dir / f'{k}_annexe.csv')

    for k, v in env_compo_agg_dict.items():
        round_float_cols(v).to_csv(annexe_dept_dir / f'td001_{k}_annexe.csv')

    # EMPTY MEMORY
    del td001_enveloppe_agg, td008_p, td007_p, env_compo_dict, env_compo_agg_dict
    del v
    del td007, td008, td010

    # SYSTEM PROCESSING

    td011 = pd.read_csv(dept_dir / 'td011_installation_chauffage.csv', dtype=str)
    td012 = pd.read_csv(dept_dir / 'td012_generateur_chauffage.csv', dtype=str)
    td013 = pd.read_csv(dept_dir / 'td013_installation_ecs.csv', dtype=str)
    td014 = pd.read_csv(dept_dir / 'td014_generateur_ecs.csv', dtype=str)

    td011_p, td012_p, td001_sys_ch_agg, td013_p, td014_p, td001_sys_ecs_agg = run_system_processing(td001, td006,
                                                                                                    td011, td012,
                                                                                                    td013, td014)
    round_float_cols(td001_sys_ch_agg).to_csv(annexe_dept_dir / 'td001_sys_ch_agg_annexe.csv')
    round_float_cols(td001_sys_ecs_agg).to_csv(annexe_dept_dir / 'td001_sys_ecs_agg_annexe.csv')
    round_float_cols(td011_p).to_csv(annexe_dept_dir / 'td011_installation_chauffage_annexe.csv')
    round_float_cols(td012_p).to_csv(annexe_dept_dir / 'td012_generateur_chauffage_annexe.csv')
    round_float_cols(td013_p).to_csv(annexe_dept_dir / 'td013_installation_ecs_annexe.csv')
    round_float_cols(td014_p).to_csv(annexe_dept_dir / 'td014_generateur_ecs_annexe.csv')
    # EMPTY MEMORY
    del td011_p, td012_p, td001_sys_ch_agg, td013_p, td014_p, td001_sys_ecs_agg
    del td011, td012, td013, td014

    # add td001 processing
    postprocessing_td001(td001)[['nom_methode_dpe_norm', 'id']].rename(columns={'id': 'td001_dpe_id'}).to_csv(
        annexe_dept_dir / 'td001_annexe_generale.csv')



build_doc(annexe_dir)
list_dir = list(Path(data_dir).iterdir())
firsts = [a_dir for a_dir in list_dir if not (annexe_dir / a_dir.name / 'td001_annexe_generale.csv').is_file()]
lasts = [a_dir for a_dir in list_dir if (annexe_dir / a_dir.name / 'td001_annexe_generale.csv').is_file()]
print(len(firsts), len(lasts))
list_dir = firsts + lasts

# list_dir.reverse()

for dept_dir in list_dir:
    print(dept_dir)
    annexe_dept_dir = annexe_dir / dept_dir.name
    annexe_dept_dir.mkdir(exist_ok=True, parents=True)
    # LOAD TABLES
    td007 = pd.read_csv(dept_dir / 'td007_paroi_opaque.csv', dtype=str)
    td006 = pd.read_csv(dept_dir / 'td006_batiment.csv', dtype=str)
    td001 = pd.read_csv(dept_dir / 'td001_dpe.csv', dtype=str)
    td008 = pd.read_csv(dept_dir / 'td008_baie.csv', dtype=str)
    td008 = td008.drop('td008_baie_id', axis=1)
    td010 = pd.read_csv(dept_dir / 'td010_pont_thermique.csv', dtype=str)
    # ENVELOPPE PROCESSING
    td001_enveloppe_agg, td008_p, td007_p, env_compo_dict, env_compo_agg_dict = run_enveloppe_processing(td001,
                                                                                                    td006,
                                                                                                         td007,
                                                                                                         td008,
                                                                                                         td010)
    break

    round_float_cols(td001_enveloppe_agg).to_csv(annexe_dept_dir / 'td001_enveloppe_agg_annexe.csv')
    round_float_cols(td007_p).to_csv(annexe_dept_dir / 'td007_paroi_opaque_annexe.csv')
    round_float_cols(td008_p).to_csv(annexe_dept_dir / 'td008_baie_annexe.csv')
    for k, v in env_compo_dict.items():
        round_float_cols(v).to_csv(annexe_dept_dir / f'{k}_annexe.csv')

    for k, v in env_compo_agg_dict.items():
        round_float_cols(v).to_csv(annexe_dept_dir / f'td001_{k}_annexe.csv')



    # SYSTEM PROCESSING

    td011 = pd.read_csv(dept_dir / 'td011_installation_chauffage.csv', dtype=str)
    td012 = pd.read_csv(dept_dir / 'td012_generateur_chauffage.csv', dtype=str)
    td013 = pd.read_csv(dept_dir / 'td013_installation_ecs.csv', dtype=str)
    td014 = pd.read_csv(dept_dir / 'td014_generateur_ecs.csv', dtype=str)

    td011_p, td012_p, td001_sys_ch_agg, td013_p, td014_p, td001_sys_ecs_agg = run_system_processing(td001, td006,
                                                                                                    td011, td012,
                                                                                                    td013, td014)
    round_float_cols(td001_sys_ch_agg).to_csv(annexe_dept_dir / 'td001_sys_ch_agg_annexe.csv')
    round_float_cols(td001_sys_ecs_agg).to_csv(annexe_dept_dir / 'td001_sys_ecs_agg_annexe.csv')
    round_float_cols(td011_p).to_csv(annexe_dept_dir / 'td011_installation_chauffage_annexe.csv')
    round_float_cols(td012_p).to_csv(annexe_dept_dir / 'td012_generateur_chauffage_annexe.csv')
    round_float_cols(td013_p).to_csv(annexe_dept_dir / 'td013_installation_ecs_annexe.csv')
    round_float_cols(td014_p).to_csv(annexe_dept_dir / 'td014_generateur_ecs_annexe.csv')

    # add td001 processing
    postprocessing_td001(td001)[['nom_methode_dpe_norm', 'id']].rename(columns={'id': 'td001_dpe_id'}).to_csv(
        annexe_dept_dir / 'td001_annexe_generale.csv')

        
    break

0 96
D:\data\dpe_full\depts\1
tv025_type_batiment not found
tv025_type_emetteur not found
tv025_equipement_intermittence not found
tv026_classe_inertie_plancher_bas not found
tv026_classe_inertie_plancher_haut not found
tv026_classe_inertie_paroi_verticale not found
tv026_classe_inertie_classe_inertie not found
tv029_simu_type_distribution not found


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table[pond_col] = table[pond]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table[pond_value_col_temp] = table[pond_col] * table[value_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [112]:
td001.shape

(97964, 69)

In [99]:
td007_murs_agg_annexe

NameError: name 'td007_murs_agg_annexe' is not defined

In [108]:
env_compo_agg_dict.keys()

dict_keys(['td007_murs_agg', 'td007_ph_agg', 'td007_pb_agg', 'td008_agg', 'td010_agg'])

In [4]:
env_compo_agg_dict['td010_agg'].type_isolation_mur

td001_dpe_id
178        ITI+ITE
427            ITR
428            ITI
524            ITI
1017           ITI
            ...   
9430358        ITI
9430417        ITI
9430549        ITI
9430721        ITI
9431176        ITI
Name: type_isolation_mur, Length: 43184, dtype: object

In [13]:
from generate_dpe_annexes_scripts.trtvtables import DPETrTvTables


In [None]:
tv003.trtv_table_dicttv003

In [18]:
tv003 = DPETrTvTables().trtv_table_dict['tv003_umur']

In [34]:
tv003['annee_isolation_min_all']=tv003.tv003_annee_construction_min.astype(str)
null = tv003.tv003_annee_construction_min.isnull()
tv003.loc[null,'annee_isolation_min_all']=tv003.loc[null,'tv003_annee_isolation_min'].astype(str)



In [35]:
tv003['annee_isolation_max_all']=tv003.tv003_annee_construction_max.astype(str)
null = tv003.tv003_annee_construction_max.isnull()
tv003.loc[null,'annee_isolation_max_all']=tv003.loc[null,'tv003_annee_isolation_max'].astype(str)



In [36]:
import numpy as np

In [37]:
tv003[['annee_isolation_min_all','annee_isolation_max_all']].dropna()

Unnamed: 0,annee_isolation_min_all,annee_isolation_max_all
0,1948,1974
1,1975,1977
2,1978,1982
3,1983,1988
4,1989,2000
...,...,...
59,1978,1982
60,1983,1988
61,1989,2000
62,2001,2005


In [41]:
tv003[['annee_isolation_min_all','annee_isolation_max_all']]=tv003[['annee_isolation_min_all','annee_isolation_max_all']].fillna(np.nan).astype(float)

In [46]:
tv003.groupby('tv003_umur')[['annee_isolation_min_all','annee_isolation_max_all']].min()

Unnamed: 0_level_0,annee_isolation_min_all,annee_isolation_max_all
tv003_umur,Unnamed: 1_level_1,Unnamed: 2_level_1
0.36,2006.0,2900.0
0.4,2001.0,2005.0
0.42,2001.0,2900.0
0.45,1989.0,2000.0
0.47,1989.0,2000.0
0.5,1989.0,2000.0
0.53,1989.0,2000.0
0.56,1989.0,2000.0
0.7,1983.0,1988.0
0.74,1983.0,1988.0


In [45]:
tv003[['tv003_umur','annee_isolation_min_all','annee_isolation_max_all']]

Unnamed: 0,tv003_umur,annee_isolation_min_all,annee_isolation_max_all
0,2.50,1948.0,1974.0
1,1.00,1975.0,1977.0
2,0.80,1978.0,1982.0
3,0.70,1983.0,1988.0
4,0.45,1989.0,2000.0
...,...,...,...
59,0.89,1978.0,1982.0
60,0.78,1983.0,1988.0
61,0.50,1989.0,2000.0
62,0.47,2001.0,2005.0


In [20]:
tv003.sort_values('tv003_umur')

Unnamed: 0,tv003_code,tv003_mur_isole,tv003_annee_construction,tv003_annee_construction_min,tv003_annee_construction_max,tv003_annee_isolation,tv003_annee_isolation_min,tv003_annee_isolation_max,tv003_effet_joule,tv003_umur,tv017_code,tv017_t_ext_moyen,tv017_peta_cw,tv017_dh14,tv017_prs1,tv003_umur_id
27,TV003_028,,à partir de 2006,2006,2900,,,,0,0.36,H2,8.08,12.00,33300,3.40,28
6,TV003_007,,à partir de 2006,2006,2900,,,,1,0.36,H1,6.58,10.50,42030,3.60,7
57,TV003_058,1,,,,à partir de 2006,2006,2900,,0.36,H2,8.08,12.00,33300,3.40,58
20,TV003_021,,à partir de 2006,2006,2900,,,,1,0.36,H2,8.08,12.00,33300,3.40,21
13,TV003_014,,à partir de 2006,2006,2900,,,,0,0.36,H1,6.58,10.50,42030,3.60,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14,TV003_015,,1948 à 1974,1948,1974,,,,1,2.50,H2,8.08,12.00,33300,3.40,15
21,TV003_022,,1948 à 1974,1948,1974,,,,0,2.50,H2,8.08,12.00,33300,3.40,22
28,TV003_029,,1948 à 1974,1948,1974,,,,1,2.50,H3,9.65,14.50,22200,2.90,29
7,TV003_008,,1948 à 1974,1948,1974,,,,0,2.50,H1,6.58,10.50,42030,3.60,8


In [12]:
env_compo_dict['td007_murs'].tv003_umur

0          NaN
1          NaN
2          NaN
3          NaN
6          NaN
          ... 
237478    0.36
237479    0.36
237480    0.36
237481    0.36
237482    0.36
Name: tv003_umur, Length: 138084, dtype: category
Categories (20, object): [0.36, 0.40, 0.42, 0.45, ..., 1.05, 1.11, 2.00, 2.50]

In [11]:
env_compo_dict['td007_murs'][['coefficient_transmission_thermique_paroi','meth_calc_U','isolation']]

Unnamed: 0,coefficient_transmission_thermique_paroi,meth_calc_U,isolation
0,0.35,EPAISSEUR ISOLATION SAISIE,ISOLE SAISI
1,0.35,EPAISSEUR ISOLATION SAISIE,ISOLE SAISI
2,0.35,EPAISSEUR ISOLATION SAISIE,ISOLE SAISI
3,0.35,EPAISSEUR ISOLATION SAISIE,ISOLE SAISI
6,0.43,EPAISSEUR ISOLATION SAISIE,ISOLE SAISI
...,...,...,...
237478,0.36,PAR DEFAUT PERIODE : ISOLE,ISOLE DEFAUT POST 1988
237479,0.36,PAR DEFAUT PERIODE : ISOLE,ISOLE DEFAUT POST 1988
237480,0.36,PAR DEFAUT PERIODE : ISOLE,ISOLE DEFAUT POST 1988
237481,0.36,PAR DEFAUT PERIODE : ISOLE,ISOLE DEFAUT POST 1988


In [5]:
td007

Unnamed: 0,id,td006_batiment_id,tr014_type_parois_opaque_id,reference,deperdition_thermique,tv001_coefficient_reduction_deperditions_id,tv002_local_non_chauffe_id,coefficient_transmission_thermique_paroi,coefficient_transmission_thermique_paroi_non_isolee,tv003_umur_id,tv004_umur0_id,tv005_upb_id,tv006_upb0_id,tv007_uph_id,tv008_uph0_id,resistance_thermique_isolation,epaisseur_isolation,surface_paroi
0,688,135,1,Mur 1,12.33,1,,0.35,0.00,,78,,,,,0.00,10.00,35.22
1,689,135,1,Mur 2,11.61,1,,0.35,0.00,,78,,,,,0.00,10.00,33.17
2,690,135,1,Mur 3,13.76,1,,0.35,0.00,,78,,,,,0.00,10.00,39.31
3,691,135,1,Mur 4,10.87,1,,0.35,0.00,,78,,,,,0.00,10.00,31.06
4,692,135,4,Plafond,5.06,1,,0.17,0.00,,,,,,4,0.00,20.00,29.52
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237480,24114982,5419675,1,MUR n°1,8.00,1,,0.36,,52,78,,,,,0.00,0.00,22.98
237481,24114983,5419675,1,MUR n°1,5.00,1,,0.36,,52,78,,,,,0.00,0.00,12.75
237482,24114984,5419675,1,MUR n°2,5.00,38,,0.36,,52,78,,,,,0.00,0.00,15.86
237483,24114985,5419675,4,PLAFOND n°1,10.00,40,5,0.20,,,,,,100,10,0.00,0.00,53.35


In [111]:
env_compo_agg_dict['td007_murs_agg'].isolation_murs_top.unique()

array(['ISOLE SAISI', 'ISOLATION INCONNUE (DEFAUT)', 'NON ISOLE',
       'ISOLE DEFAUT POST 1988', 'ISOLE DEFAUT PRE 1988', nan,
       'STRUCTURE ISOLANTE (ITR)'], dtype=object)

In [102]:
env_compo_agg_dict['td007_murs_agg'].meth_calc_U_murs_top.unique()

array(['EPAISSEUR ISOLATION SAISIE',
       'PAR DEFAUT PERIODE : ISOLATION INCONNUE', 'MUR NON ISOLE U=2',
       'PAR DEFAUT PERIODE : ISOLE', 'RESISTANCE ISOLATION SAISIE', nan,
       'STRUCTURE ISOLANTE (ITR) U<1', 'U SAISI DIRECTEMENT : ISOLE',
       'INCONNUE'], dtype=object)

In [97]:
env_compo_agg_dict['td007_murs_agg'].type_LNC_murs_top.unique()

Unnamed: 0_level_0,type_adjacence_top,type_adjacence_array,type_LNC_murs_array,type_LNC_murs_top,surface_murs_bat_adj,surface_murs_exterieur,surface_murs_lnc,surface_murs_nondef,surface_murs_paroi_enterree,meth_calc_U_murs_top,...,epaisseur_structure_murs_lnc_top,meth_calc_U_murs_bat_adj_top,U_murs_bat_adj_top,epaisseur_isolation_murs_bat_adj_top,resistance_thermique_isolation_murs_bat_adj_top,isolation_murs_bat_adj_top,annee_isole_uniforme_min_murs_bat_adj_top,annee_isole_uniforme_max_murs_bat_adj_top,materiaux_structure_murs_bat_adj_top,epaisseur_structure_murs_bat_adj_top
td001_dpe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
178,EXTERIEUR,[EXTERIEUR],[],,,138.76,,,,EPAISSEUR ISOLATION SAISIE,...,,,,,,,,,,
427,EXTERIEUR,"[EXTERIEUR, LNC]",[Garage],Garage,,87.31,9.06,,,EPAISSEUR ISOLATION SAISIE,...,30,,,,,,,,,
428,EXTERIEUR,"[EXTERIEUR, LNC]",[Garage],Garage,,105.34,15.69,,,EPAISSEUR ISOLATION SAISIE,...,28,,,,,,,,,
524,EXTERIEUR,"[EXTERIEUR, LNC]",[Garage],Garage,,89.10,12.03,,,PAR DEFAUT PERIODE : ISOLATION INCONNUE,...,20 et -,,,,,,,,,
1017,EXTERIEUR,[EXTERIEUR],[],,,128.02,,,,PAR DEFAUT PERIODE : ISOLATION INCONNUE,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9430358,EXTERIEUR,"[EXTERIEUR, LNC]",[Garage],Garage,,67.88,19.35,,,PAR DEFAUT PERIODE : ISOLATION INCONNUE,...,Sans objet,,,,,,,,,
9430417,EXTERIEUR,"[EXTERIEUR, LNC]",[Garage],Garage,,84.35,18.61,,,PAR DEFAUT PERIODE : ISOLE,...,20 et -,,,,,,,,,
9430549,EXTERIEUR,[EXTERIEUR],[],,,94.70,,,,PAR DEFAUT PERIODE : ISOLATION INCONNUE,...,,,,,,,,,,
9430721,EXTERIEUR,"[EXTERIEUR, LNC]",[Circulation avec ouverture directe sur l'exté...,Circulation avec ouverture directe sur l'extér...,,79.13,29.67,,,PAR DEFAUT PERIODE : ISOLATION INCONNUE,...,20 et -,,,,,,,,,


In [96]:
env_compo_agg_dict.keys()

dict_keys(['td007_murs_agg', 'td007_ph_agg', 'td007_pb_agg', 'td008_agg', 'td010_agg'])

In [62]:
td001=td001.rename(columns={'id':'td001_dpe_id'})

In [63]:
td0016=td001.merge(td006,on='td001_dpe_id',how='left')

In [64]:
td0016.classe_consommation_energie

0        C
1        E
2        E
3        C
4        D
        ..
97959    D
97960    D
97961    A
97962    D
97963    N
Name: classe_consommation_energie, Length: 97964, dtype: object

In [65]:
td0016 = td0016.dropna(subset=['besoin_chauffage'])

In [66]:
cols = ['id','td001_dpe_id','surface_habitable','consommation_energie','classe_consommation_energie']
cols += ['besoin_chauffage', 'deperdition_enveloppe',
       'deperdition_renouvellement_air', 'tv014_permeabilite_id',
       'tv015_valeur_conventionnelle_renouvellement_air_id',
       'tv026_classe_inertie_id', 'altitude', 'nombre_niveau', 'hsp_moyenne',
       'nombre_appartements', 'cclim', 'comclim']



In [67]:
td006.groupby('td001_dpe_id').besoin_chauffage.count().sort_values()

td001_dpe_id
1000019    1
6526033    1
6526069    1
6526402    1
6527108    1
          ..
3768476    1
3768482    1
3768759    1
3765305    1
999972     1
Name: besoin_chauffage, Length: 56643, dtype: int64

In [68]:
td0016[cols]

Unnamed: 0,id,td001_dpe_id,surface_habitable,consommation_energie,classe_consommation_energie,besoin_chauffage,deperdition_enveloppe,deperdition_renouvellement_air,tv014_permeabilite_id,tv015_valeur_conventionnelle_renouvellement_air_id,tv026_classe_inertie_id,altitude,nombre_niveau,hsp_moyenne,nombre_appartements,cclim,comclim
0,135,178,190.00,132.00,C,232.46,308.39,93.20,2,5,3,520,2.00,2.50,1,0.00,0.00
1,282,390,106.87,286.00,E,0.00,0.00,0.00,,,,0,0.00,0.00,0,0.00,0.00
2,307,427,102.38,319.91,E,196.63,268.68,67.40,3,4,5,240,1.00,2.58,0,0.00,1.00
3,308,428,114.80,148.09,C,167.45,226.14,75.74,3,4,5,240,1.00,2.50,0,0.00,1.00
4,391,524,120.62,210.00,D,121.92,187.94,55.87,2,7,3,215,1.00,2.50,1,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97957,5419210,9430417,99.00,399.00,F,240.50,291.63,63.55,2,4,3,224,1.00,2.50,1,0.00,0.00
97958,5419296,9430549,200.00,246.00,E,245.08,346.36,160.34,2,9,3,476,2.00,2.50,1,0.00,0.00
97959,5419402,9430721,82.61,162.00,D,46.27,89.93,34.73,2,7,1,225,1.00,2.50,1,0.00,0.00
97960,5419444,9430783,77.02,182.00,D,0.00,0.00,0.00,,,,0,0.00,0.00,0,0.00,0.00


In [69]:
td007=td007.rename(columns={'id':'td007_paroi_opaque_id'})

td007=td007.merge(td007_p,on='td007_paroi_opaque_id')

In [70]:
td007.loc[td007.td006_batiment_id=='135'].deperdition_thermique.astype(float).sum()

64.85

In [71]:
td008=td008.rename(columns={'id':'td008_baie_id'})

td008=td008.merge(td008_p,on='td008_baie_id')

In [83]:
sel=td008.loc[td008.td001_dpe_id=='178']
(sel.deperdition.astype(float)*sel.nb_baie_calc).sum()

56.629999999999995

In [85]:
from generate_dpe_annexes_scripts.trtvtables import DPETrTvTables

td010_types = {'longueur': 'float'}


def merge_td010_tr_tv(td010):
    meta = DPETrTvTables()
    table = td010.copy()
    table = meta.merge_all_tr_tables(table)

    table = meta.merge_all_tv_tables(table)

    table = table.loc[:, ~table.columns.duplicated()]

    return table


In [86]:
td010 = merge_td010_tr_tv(td010)

In [94]:
sel = td010.loc[td010.td006_batiment_id=='135']
(sel.longueur.astype(float)*sel.tv013_k.astype(float)).sum()

28.621199999999998

In [None]:
td008.loc[td008.td001_dpe_id=='178'].deperdition.astype(float).sum()

In [2]:
from td007_processing import merge_td007_tr_tv, postprocessing_td007, generate_pb_table, \
    generate_ph_table, generate_murs_table, agg_td007_murs_to_td001, agg_td007_ph_to_td001, agg_td007_pb_to_td001

from td008_processing import merge_td008_tr_tv, postprocessing_td008
from td001_merge import merge_td001_dpe_id_envelope
from td007_processing import agg_td007_to_td001_essential, agg_surface_envelope
from td008_processing import agg_td008_to_td001_essential, agg_td008_to_td001
from td010_processing import merge_td010_tr_tv, postprocessing_td010, agg_td010_td001
td008_raw_cols = td008.columns.tolist()
td007_raw_cols = td007.columns.tolist()
td010_raw_cols = td010.columns.tolist()

td001, td006, td007, td008, td010 = merge_td001_dpe_id_envelope(td001=td001, td006=td006, td007=td007, td008=td008,
                                                                td010=td010)
# POSTPRO DES TABLES
td008 = merge_td008_tr_tv(td008)
td008 = postprocessing_td008(td008)

td007 = merge_td007_tr_tv(td007)
td007 = postprocessing_td007(td007, td008)

td010 = merge_td010_tr_tv(td010)
td010 = postprocessing_td010(td010)

# TABLES PAR TYPE COMPOSANT
td007_pb = generate_pb_table(td007)
td007_ph = generate_ph_table(td007)
td007_murs = generate_murs_table(td007)

# TABLES SYNTHETIQUES TOUTES THEMATIQUES

td007_agg_essential = agg_td007_to_td001_essential(td007)
td008_agg_essential = agg_td008_to_td001_essential(td008)
surfaces_agg_essential = agg_surface_envelope(td007, td008)

td001_enveloppe_agg = pd.concat([td007_agg_essential, td008_agg_essential, surfaces_agg_essential], axis=1)

td001_enveloppe_agg.index.name = 'td001_dpe_id'
cols = [el for el in td008.columns if el not in td008_raw_cols + ['fen_lib_from_tv009',
                                                                  'fen_lib_from_tv021']]
cols.append('td008_baie_id')
cols = unique_ordered(cols)
td008_p = td008[cols]
cols = [el for el in td007.columns if
        el not in td007_raw_cols + ["qualif_surf", 'surface_paroi_opaque_calc', 'surface_paroi_totale_calc_v1',
                                    'surface_paroi_totale_calc_v2']]
cols.append('td007_paroi_opaque_id')
cols = unique_ordered(cols)
td007_p = td007[cols]


cols = [el for el in td010.columns if
        el not in td010_raw_cols]
cols.append('td010_pont_thermique_id')
cols = unique_ordered(cols)
td010_p = td010[cols]

# TABLES AGGREGEES PAR TYPE COMPOSANT
td007_murs_agg = agg_td007_murs_to_td001(td007_murs)
td007_ph_agg = agg_td007_ph_to_td001(td007_ph)
td007_pb_agg = agg_td007_pb_to_td001(td007_pb)
td008_agg = agg_td008_to_td001(td008)
td010_agg = agg_td010_td001(td010)

env_compo_dict = dict(td007_paroi_opaque=td007_p,
                      td007_ph=td007_ph,
                      td007_pb=td007_pb,
                      td007_murs=td007_murs,
                      td008_baie=td008_p,
                      td010_pont_thermique=td010_p)

env_compo_agg_dict = dict(td007_murs_agg=td007_murs_agg,
                          td007_ph_agg=td007_ph_agg,
                          td007_pb_agg=td007_pb_agg, td008_agg=td008_agg, td010_agg=td010_agg)


tv025_type_batiment not found
tv025_type_emetteur not found
tv025_equipement_intermittence not found
tv026_classe_inertie_plancher_bas not found
tv026_classe_inertie_plancher_haut not found
tv026_classe_inertie_paroi_verticale not found
tv026_classe_inertie_classe_inertie not found
tv029_simu_type_distribution not found


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table[pond_col] = table[pond]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table[pond_value_col_temp] = table[pond_col] * table[value_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


['td007_paroi_opaque_id', 'td006_batiment_id', 'tr014_type_parois_opaque_id', 'reference', 'deperdition_thermique', 'tv001_coefficient_reduction_deperditions_id', 'tv002_local_non_chauffe_id', 'U', 'coefficient_transmission_thermique_paroi_non_isolee', 'tv003_umur_id', 'tv004_umur0_id', 'tv005_upb_id', 'tv006_upb0_id', 'tv007_uph_id', 'tv008_uph0_id', 'resistance_thermique_isolation', 'epaisseur_isolation', 'surface_paroi', 'td001_dpe_id', 'tr014_code', 'tr014_sous_type', 'tv001_code', 'tv001_aiu_aue', 'tv001_aiu_aue_min', 'tv001_aiu_aue_max', 'tv001_uv_ue', 'tv001_aue_isole', 'tv001_aiu_isole', 'tv001_valeur', 'tv002_code', 'type_local_non_chauffe', 'tv002_uvue', 'tv002_type_batiment', 'tv003_code', 'tv003_mur_isole', 'tv003_annee_construction', 'tv003_annee_construction_min', 'tv003_annee_construction_max', 'tv003_annee_isolation', 'tv003_annee_isolation_min', 'tv003_annee_isolation_max', 'tv003_effet_joule', 'tv003_umur', 'tv017_code', 'tv017_t_ext_moyen', 'tv017_peta_cw', 'tv017_dh

In [3]:
from utils import agg_pond_top_freq

In [4]:
type_isol_mur = agg_pond_top_freq(td010, 'tv013_isolation_mur', 'longueur',
                                  'td001_dpe_id').to_frame('type_isolation_mur')

In [5]:
type_isol_mur

Unnamed: 0_level_0,type_isolation_mur
td001_dpe_id,Unnamed: 1_level_1
178,ITI+ITE
427,ITR
428,ITI
524,ITI
1017,ITI
...,...
9430358,ITI
9430417,ITI
9430549,ITI
9430721,ITI


In [9]:
td007_p.tr014_code

0         TR014_001
1         TR014_001
2         TR014_001
3         TR014_001
4         TR014_004
            ...    
237480    TR014_001
237481    TR014_001
237482    TR014_001
237483    TR014_004
237484    TR014_003
Name: tr014_code, Length: 237485, dtype: category
Categories (5, object): [TR014_001, TR014_002, TR014_003, TR014_004, TR014_005]

In [5]:
td010 = pd.read_csv(dept_dir / 'td010_pont_thermique.csv', dtype=str)


In [3]:
td007 = td007.rename(columns ={'id':'td007_paroi_opaque_id'})

In [4]:
td007 = td007.merge(td007_p,on='td007_paroi_opaque_id',how='left')

# traitements murs

In [74]:
from utils import agg_pond_top_freq,agg_pond_avg

In [6]:
td007_murs = td007.loc[td007.tr014_type_parois_opaque_id.isin(['2', '1'])].copy()

float_cols = ['coefficient_transmission_thermique_paroi_non_isolee', 'coefficient_transmission_thermique_paroi',
              'epaisseur_isolation', 'resistance_thermique_isolation']
td007_murs[float_cols] = td007_murs[float_cols].astype(float)

# ## label uniforme tv003

td007_murs['tv003_periode_isolation_uniforme'] = td007_murs.tv003_annee_construction.astype('string')

td007_murs['tv003_label_isolation_uniforme'] = td007_murs.tv003_annee_construction.astype('string')

null = td007_murs['tv003_label_isolation_uniforme'].isnull()

td007_murs.loc[null, 'tv003_label_isolation_uniforme'] = td007_murs.loc[null, 'tv003_annee_isolation'].astype(
    'string')

inconnu = td007_murs.tv003_mur_isole.isnull() & (~td007_murs.tv003_annee_construction.isnull())
non_isole = td007_murs.tv003_mur_isole == '0'
isole = td007_murs.tv003_mur_isole == '1'
is_annee_construction = ~td007_murs.tv003_annee_construction.isnull()
is_annee_isolation = ~td007_murs.tv003_annee_isolation.isnull()

td007_murs.loc[inconnu, 'tv003_label_isolation_uniforme'] = 'isol. inconnue periode constr : ' + td007_murs.loc[
    inconnu, 'tv003_label_isolation_uniforme']
td007_murs.loc[non_isole, 'tv003_label_isolation_uniforme'] = 'non isolé'
td007_murs.loc[isole & is_annee_construction, 'tv003_label_isolation_uniforme'] = 'isolé periode constr : ' + \
                                                                            td007_murs.loc[
                                                                                isole & is_annee_construction, 'tv003_label_isolation_uniforme']
td007_murs.loc[isole & (~is_annee_construction), 'tv003_label_isolation_uniforme'] = 'isolé periode isolation :' + \
                                                                               td007_murs.loc[isole & (
                                                                                   ~is_annee_construction), 'tv003_label_isolation_uniforme']

# annee isolation uniforme.

td007_murs['annee_isole_uniforme_min'] = td007_murs.tv003_annee_construction_min.astype('string')
td007_murs['annee_isole_uniforme_max'] = td007_murs.tv003_annee_construction_max.astype('string')
td007_murs.loc[is_annee_isolation, 'annee_isole_uniforme_min'] = td007_murs.loc[is_annee_isolation,'tv003_annee_isolation_min'].astype('string')
td007_murs.loc[is_annee_isolation, 'annee_isole_uniforme_max'] = td007_murs.loc[is_annee_isolation,'tv003_annee_isolation_max'].astype('string')


td007_murs.tv003_label_isolation_uniforme.value_counts()

# ## label méthode calcul  U

td007_murs['meth_calc_U'] = 'INCONNUE'

# calc booleens
U = td007_murs.coefficient_transmission_thermique_paroi.round(2)
U_non_isolee = td007_murs.coefficient_transmission_thermique_paroi_non_isolee.round(2)
bool_U_egal_0 = U.round(2) == 0.00
bool_U_U0 = U.round(2) == U_non_isolee.round(2)
bool_U_2 = U.round(2) >= 2 | non_isole
bool_U_U0 = bool_U_U0 & (~bool_U_2)
bool_U_U0_auto_isol = bool_U_U0 & (U_non_isolee < 1)
bool_U_brut = (U <= 1) & (~bool_U_U0)
bool_U_brut_non_isole = (U > 1) & (~bool_U_U0)
bool_U_par_e = td007_murs.epaisseur_isolation > 0
bool_U_par_r = td007_murs.resistance_thermique_isolation > 0


# remplacer 0 par nan lorsque les 0 sont des non information. 

td007_murs.loc[~bool_U_par_e,'epaisseur_isolation']=np.nan
td007_murs.loc[~bool_U_par_r,'resistance_thermique_isolation']=np.nan


# imputation labels

td007_murs.loc[bool_U_brut, 'meth_calc_U'] = 'U SAISI DIRECTEMENT : ISOLE'
td007_murs.loc[bool_U_brut_non_isole, 'meth_calc_U'] = 'U SAISI DIRECTEMENT : NON ISOLE'
td007_murs.loc[bool_U_par_e, 'meth_calc_U'] = 'EPAISSEUR ISOLATION SAISIE'
td007_murs.loc[bool_U_par_r, 'meth_calc_U'] = 'RESISTANCE ISOLATION SAISIE'
td007_murs.loc[bool_U_2, 'meth_calc_U'] = 'MUR NON ISOLE U=2'
td007_murs.loc[bool_U_U0, 'meth_calc_U'] = 'MUR NON ISOLE U<2'
td007_murs.loc[bool_U_U0_auto_isol, 'meth_calc_U'] = 'STRUCTURE ISOLANTE (ITR) U<1'
td007_murs.loc[inconnu, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLATION INCONNUE'
td007_murs.loc[isole, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLE'
td007_murs.loc[isole, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLE'
td007_murs.loc[bool_U_egal_0, 'meth_calc_U'] = 'ERREUR : U=0'

# ## label isolatoin

td007_murs['isolation'] = 'NON ISOLE'
is_isole = ~td007_murs.meth_calc_U.str.contains('NON ISOLE|INCONNUE')
td007_murs.loc[is_isole, 'isolation'] = 'ISOLE SAISI'
is_isole_defaut = is_isole & (td007_murs.meth_calc_U.str.contains('DEFAUT'))
td007_murs.loc[is_isole_defaut, 'isolation'] = 'ISOLE DEFAUT PRE 1988'

inconnu = td007_murs.meth_calc_U.str.contains('INCONNUE')
post_88 = td007_murs['annee_isole_uniforme_min'] >= "1988"

td007_murs.loc[inconnu, 'isolation'] = 'ISOLATION INCONNUE (DEFAUT)'

td007_murs.loc[(inconnu | is_isole_defaut) & post_88, 'isolation'] = 'ISOLE DEFAUT POST 1988'

is_isole_struc = is_isole & (td007_murs.meth_calc_U.str.contains('STRUCTURE'))
td007_murs.loc[is_isole_struc, 'isolation'] = 'STRUCTURE ISOLANTE (ITR)'

is_err = td007_murs.meth_calc_U.str.contains('ERREUR')

td007_murs.loc[is_err, 'isolation'] = 'NONDEF'

# ## label adjacence

td007_murs['type_adjacence'] = 'NONDEF'

ext = td007_murs.tv001_code=='TV001_001'

td007_murs.loc[ext,'type_adjacence'] = 'EXTERIEUR'

is_dep=td007_murs.b_infer.round(1)>=0.9

td007_murs.loc[is_dep,'type_adjacence'] = 'EXTERIEUR'

enterre = td007_murs.tv001_code=='TV001_002'

td007_murs.loc[enterre,'type_adjacence'] = 'PAROI_ENTERREE'

not_null = ~td007_murs.tv002_local_non_chauffe.isnull()

td007_murs.loc[not_null, 'type_adjacence'] = 'LNC'

is_lnc = td007_murs.tv001_code.astype('string') > 'TV001_004'

td007_murs.loc[is_lnc, 'type_adjacence'] = 'LNC'

is_adj = td007_murs.tv001_code == 'TV001_004'

td007_murs.loc[is_adj, 'type_adjacence'] = 'BAT_ADJ'

# TODO :tv001_262 ???


In [7]:
td007_murs = td007_murs.rename(columns={'tv004_epaisseur':'epaisseur_structure',
                                       'tv002_local_non_chauffe':'type_local_non_chauffe',
                                       'coefficient_transmission_thermique_paroi':'U'})

In [8]:
td007_murs.tv001_code.value_counts()

TV001_001    79714
TV001_004     4429
TV001_100     3207
TV001_262     2106
TV001_038     1827
             ...  
TV001_164        0
TV001_180        0
TV001_181        0
TV001_185        0
TV001_129        0
Name: tv001_code, Length: 262, dtype: int64

# aggregation td001 murs

In [9]:
concat = list()
type_adjacence_top = agg_pond_top_freq(td007_murs, 'type_adjacence', 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'type_adjacence_top')

type_adjacence_arr_agg = td007_murs.groupby('td001_dpe_id').type_adjacence.agg(lambda x:np.sort(x.dropna().unique()).tolist())

type_adjacence_arr_agg.name = 'type_adjacence_array'

concat.append(type_adjacence_top)
concat.append(type_adjacence_arr_agg)

type_local_non_chauffe_arr_agg = td007_murs.groupby('td001_dpe_id').type_local_non_chauffe.agg(lambda x:np.sort(x.dropna().unique()).tolist())
type_local_non_chauffe_arr_agg = type_local_non_chauffe_arr_agg.to_frame('type_LNC_murs_array')
type_local_non_chauffe_agg_top = agg_pond_top_freq(td007_murs, 'type_local_non_chauffe', 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'type_LNC_murs_top')

pivot=td007_murs.pivot_table(index='td001_dpe_id',columns='type_adjacence',values='surface_paroi_opaque_infer',aggfunc='sum')
pivot.columns = [f'surface_murs_{col.lower()}' for col in pivot]
concat.extend([type_local_non_chauffe_arr_agg,type_local_non_chauffe_agg_top,pivot])

for var in ['meth_calc_U','U','epaisseur_isolation','resistance_thermique_isolation','isolation',
            'annee_isole_uniforme_min','annee_isole_uniforme_max','materiaux_structure','epaisseur_structure',
           ]:

    var_agg = agg_pond_top_freq(td007_murs, var, 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'{var}_murs_top')
    concat.append(var_agg)


for type_adjacence in ['EXTERIEUR','LNC','BAT_ADJ']:
    sel = td007_murs.loc[td007_murs.type_adjacence ==type_adjacence]
    for var in ['meth_calc_U','U','epaisseur_isolation','resistance_thermique_isolation','isolation',
                'annee_isole_uniforme_min','annee_isole_uniforme_max','materiaux_structure','epaisseur_structure',
               ]:
    
        var_agg = agg_pond_top_freq(sel, var, 'surface_paroi_opaque_infer',
                               'td001_dpe_id').to_frame(f'{var}_murs_{type_adjacence.lower()}_top')
        concat.append(var_agg)

    
    
td007_murs_agg = pd.concat(concat,axis=1)



td007_murs_agg.index.name = 'td001_dpe_id'

In [11]:
td007_murs_agg.to_csv('murs.csv')

# traitements planchers

In [12]:
td007_pb = td007.loc[td007.tr014_type_parois_opaque_id=='3'].copy()

float_cols = ['coefficient_transmission_thermique_paroi_non_isolee', 'coefficient_transmission_thermique_paroi',
              'epaisseur_isolation', 'resistance_thermique_isolation']
td007_pb[float_cols] = td007_pb[float_cols].astype(float)



# ## label uniforme tv005

td007_pb['tv005_periode_isolation_uniforme'] = td007_pb.tv005_annee_construction.astype('string')

td007_pb['tv005_label_isolation_uniforme'] = td007_pb.tv005_annee_construction.astype('string')

null = td007_pb['tv005_label_isolation_uniforme'].isnull()

td007_pb.loc[null, 'tv005_label_isolation_uniforme'] = td007_pb.loc[null, 'tv005_annee_isolation'].astype(
    'string')


inconnu = td007_pb.tv005_pb_isole=="Inconnu"
non_isole = td007_pb.tv005_pb_isole == 'Non'
isole = td007_pb.tv005_pb_isole == '1'
tp  = td007_pb.tv005_pb_isole == 'Terre Plein'

is_annee_construction = ~td007_pb.tv005_annee_construction.isnull()
is_annee_isolation = ~td007_pb.tv005_annee_isolation.isnull()

td007_pb.loc[inconnu, 'tv005_label_isolation_uniforme'] = 'isol. inconnue periode constr : ' + td007_pb.loc[
    inconnu, 'tv005_label_isolation_uniforme']
td007_pb.loc[non_isole, 'tv005_label_isolation_uniforme'] = 'non isolé'

td007_pb.loc[isole & is_annee_construction, 'tv005_label_isolation_uniforme'] = 'isolé periode constr : ' + \
                                                                            td007_pb.loc[
                                                                                isole & is_annee_construction, 'tv005_label_isolation_uniforme']
td007_pb.loc[isole & (~is_annee_construction), 'tv005_label_isolation_uniforme'] = 'isolé periode isolation :' + \
                                                                               td007_pb.loc[isole & (
                                                                                   ~is_annee_construction), 'tv005_label_isolation_uniforme']

td007_pb.loc[isole & (~is_annee_construction), 'tv005_label_isolation_uniforme'] = 'isolé periode isolation :' + \
                                                                               td007_pb.loc[isole & (
                                                                                   ~is_annee_construction), 'tv005_label_isolation_uniforme']

td007_pb.loc[tp, 'tv005_label_isolation_uniforme'] = 'Terre Plein periode constr : ' + td007_pb.loc[
    tp, 'tv005_label_isolation_uniforme']


# annee isolation uniforme.

td007_pb['annee_isole_uniforme_min'] = td007_pb.tv005_annee_construction_min.astype('string')
td007_pb['annee_isole_uniforme_max'] = td007_pb.tv005_annee_construction_max.astype('string')
td007_pb.loc[is_annee_isolation, 'annee_isole_uniforme_min'] = td007_pb.loc[is_annee_isolation,'tv005_annee_isolation_min'].astype('string')
td007_pb.loc[is_annee_isolation, 'annee_isole_uniforme_max'] = td007_pb.loc[is_annee_isolation,'tv005_annee_isolation_max'].astype('string')





# ## label méthode calcul  U

td007_pb['meth_calc_U'] = 'INCONNUE'

# calc booleens
U = td007_pb.coefficient_transmission_thermique_paroi.round(2)
U_non_isolee = td007_pb.coefficient_transmission_thermique_paroi_non_isolee.round(2)
bool_U_egal_0 = U.round(2) == 0.00
bool_U_U0 = U.round(2) == U_non_isolee.round(2)
bool_U_2 = U.round(2) >= 2 | non_isole
bool_U_U0 = bool_U_U0 & (~bool_U_2)
bool_U_U0_auto_isol = bool_U_U0 & (U_non_isolee < 1)
bool_U_brut = (U <= 1) & (~bool_U_U0)
bool_U_brut_non_isole = (U > 1) & (~bool_U_U0)
bool_U_par_e = td007_pb.epaisseur_isolation > 0
bool_U_par_r = td007_pb.resistance_thermique_isolation > 0


# remplacer 0 par nan lorsque les 0 sont des non information. 

td007_pb.loc[~bool_U_par_e,'epaisseur_isolation']=np.nan
td007_pb.loc[~bool_U_par_r,'resistance_thermique_isolation']=np.nan


# imputation labels

td007_pb.loc[bool_U_brut, 'meth_calc_U'] = 'U SAISI DIRECTEMENT : ISOLE'
td007_pb.loc[bool_U_brut_non_isole, 'meth_calc_U'] = 'U SAISI DIRECTEMENT : NON ISOLE'
td007_pb.loc[bool_U_par_e, 'meth_calc_U'] = 'EPAISSEUR ISOLATION SAISIE'
td007_pb.loc[bool_U_par_r, 'meth_calc_U'] = 'RESISTANCE ISOLATION SAISIE'
td007_pb.loc[bool_U_2, 'meth_calc_U'] = 'PLANCHER NON ISOLE U=2'
td007_pb.loc[bool_U_U0, 'meth_calc_U'] = 'PLANCHER NON ISOLE U<2'
td007_pb.loc[bool_U_U0_auto_isol, 'meth_calc_U'] = 'STRUCTURE ISOLANTE U<1'
td007_pb.loc[inconnu, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLATION INCONNUE'
td007_pb.loc[isole, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLE'
td007_pb.loc[tp, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : TERRE PLEIN'
td007_pb.loc[bool_U_egal_0, 'meth_calc_U'] = 'ERREUR : U=0'


# ## label isolatoin

td007_pb['isolation'] = 'NON ISOLE'
is_isole = ~td007_pb.meth_calc_U.str.contains('NON ISOLE|INCONNUE|TERRE')
td007_pb.loc[is_isole, 'isolation'] = 'ISOLE SAISI'
is_isole_defaut = is_isole & (td007_pb.meth_calc_U.str.contains('DEFAUT'))
td007_pb.loc[is_isole_defaut, 'isolation'] = 'ISOLE DEFAUT PRE 1982'

inconnu = td007_pb.meth_calc_U.str.contains('INCONNUE')
post_82 = td007_pb['annee_isole_uniforme_min'] >= "1982"
post_2001 = td007_pb['annee_isole_uniforme_min'] >= "2001"

td007_pb.loc[inconnu, 'isolation'] = 'ISOLATION INCONNUE (DEFAUT)'

td007_pb.loc[(inconnu | is_isole_defaut) & post_82, 'isolation'] = 'ISOLE DEFAUT POST 1982'


td007_pb.loc[tp, 'isolation'] = 'TERRE PLEIN DEFAUT PRE 2001'
td007_pb.loc[tp & post_2001, 'isolation'] = 'TERRE PLEIN DEFAUT POST 2001'


is_isole_struc = is_isole & (td007_pb.meth_calc_U.str.contains('STRUCTURE'))

td007_pb.loc[is_isole_struc, 'isolation'] = 'STRUCTURE ISOLANTE'

is_err = td007_pb.meth_calc_U.str.contains('ERREUR')

td007_pb.loc[is_err, 'isolation'] = 'NONDEF'


# ## label adjacence

td007_pb['type_adjacence'] = 'NONDEF'

ext = td007_pb.tv001_code=='TV001_001'

td007_pb.loc[ext,'type_adjacence'] = 'EXTERIEUR'

is_dep=td007_pb.b_infer.round(1)>=0.9

td007_pb.loc[is_dep,'type_adjacence'] = 'EXTERIEUR'

enterre = td007_pb.tv001_code=='TV001_002'

td007_pb.loc[enterre,'type_adjacence'] = 'PAROI_ENTERREE'

not_null = ~td007_pb.tv002_local_non_chauffe.isnull()

td007_pb.loc[not_null, 'type_adjacence'] = 'LNC'

is_lnc = td007_pb.tv001_code.astype('string') > 'TV001_004'

td007_pb.loc[is_lnc, 'type_adjacence'] = 'LNC'

is_adj = td007_pb.tv001_code == 'TV001_004'

td007_pb.loc[is_adj, 'type_adjacence'] = 'BAT_ADJ'

is_tp = td007_pb.tv001_code=='TV001_261'

td007_pb.loc[is_tp, 'type_adjacence'] = 'TERRE_PLEIN'

is_vs = td007_pb.tv001_code=='TV001_003'

td007_pb.loc[is_vs, 'type_adjacence'] = 'VIDE_SANITAIRE'

In [13]:
td007_pb = td007_pb.rename(columns={
                                       'tv002_local_non_chauffe':'type_local_non_chauffe',
                                       'coefficient_transmission_thermique_paroi':'U'})

  


# agg planchers

In [15]:
td007_pb = td007_pb.rename(columns={
                                       'tv002_local_non_chauffe':'type_local_non_chauffe',
                                       'coefficient_transmission_thermique_paroi':'U'})
concat = list()
type_adjacence_top = agg_pond_top_freq(td007_pb, 'type_adjacence', 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'type_adjacence_top')

type_adjacence_arr_agg = td007_pb.groupby('td001_dpe_id').type_adjacence.agg(lambda x:np.sort(x.dropna().unique()).tolist())

type_adjacence_arr_agg.name = 'type_adjacence_array'

concat.append(type_adjacence_top)
concat.append(type_adjacence_arr_agg)

type_local_non_chauffe_arr_agg = td007_pb.groupby('td001_dpe_id').type_local_non_chauffe.agg(lambda x:np.sort(x.dropna().unique()).tolist())
type_local_non_chauffe_arr_agg = type_local_non_chauffe_arr_agg.to_frame('type_LNC_planchers_array')
type_local_non_chauffe = agg_pond_top_freq(td007_pb, 'type_local_non_chauffe', 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'type_LNC_planchers_top')

pivot=td007_pb.pivot_table(index='td001_dpe_id',columns='type_adjacence',values='surface_paroi_opaque_infer',aggfunc='sum')
pivot.columns = [f'surface_planchers_{col.lower()}' for col in pivot]

concat.extend([type_local_non_chauffe_arr_agg,type_local_non_chauffe_agg_top,pivot])


for var in ['meth_calc_U','U','epaisseur_isolation','resistance_thermique_isolation','isolation',
            'annee_isole_uniforme_min','annee_isole_uniforme_max','materiaux_structure',
           ]:

    var_agg = agg_pond_top_freq(td007_pb, var, 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'{var}_plancher_top')
    concat.append(var_agg)



for type_adjacence_simple in ['EXTERIEUR','TP_VS','LNC','BAT_ADJ']:
    sel = td007_pb.loc[td007_pb.type_adjacence_simple ==type_adjacence_simple]
    for var in ['meth_calc_U','U','epaisseur_isolation','resistance_thermique_isolation','isolation',
                'annee_isole_uniforme_min','annee_isole_uniforme_max','materiaux_structure',
               ]:
    
        var_agg = agg_pond_top_freq(sel, var, 'surface_paroi_opaque_infer',
                               'td001_dpe_id').to_frame(f'{var}_plancher_{type_adjacence_simple.lower()}_top')
        concat.append(var_agg)

    
    
td007_pb_agg = pd.concat(concat,axis=1)



td007_pb_agg.index.name = 'td001_dpe_id'

In [17]:
td007_pb_agg.to_csv('planchers.csv')

In [18]:
test = pd.read_csv('planchers.csv')

# traitement des plafonds

In [19]:
td007_ph = td007.loc[td007.tr014_type_parois_opaque_id=='4'].copy()

float_cols = ['coefficient_transmission_thermique_paroi_non_isolee', 'coefficient_transmission_thermique_paroi',
              'epaisseur_isolation', 'resistance_thermique_isolation']
td007_ph[float_cols] = td007_ph[float_cols].astype(float)



# ## label uniforme tv007

td007_ph['tv007_periode_isolation_uniforme'] = td007_ph.tv007_annee_construction.astype('string')

td007_ph['tv007_label_isolation_uniforme'] = td007_ph.tv007_annee_construction.astype('string')

null = td007_ph['tv007_label_isolation_uniforme'].isnull()

td007_ph.loc[null, 'tv007_label_isolation_uniforme'] = td007_ph.loc[null, 'tv007_annee_isolation'].astype(
    'string')


inconnu = td007_ph.tv007_ph_isole=="0"
non_isole = td007_ph.tv007_ph_isole == '2'
isole = td007_ph.tv007_ph_isole == '1'

is_annee_construction = ~td007_ph.tv007_annee_construction.isnull()
is_annee_isolation = ~td007_ph.tv007_annee_isolation.isnull()

td007_ph.loc[inconnu, 'tv007_label_isolation_uniforme'] = 'isol. inconnue periode constr : ' + td007_ph.loc[
    inconnu, 'tv007_label_isolation_uniforme']
td007_ph.loc[non_isole, 'tv007_label_isolation_uniforme'] = 'non isolé'

td007_ph.loc[isole & is_annee_construction, 'tv007_label_isolation_uniforme'] = 'isolé periode constr : ' + \
                                                                            td007_ph.loc[
                                                                                isole & is_annee_construction, 'tv007_label_isolation_uniforme']
td007_ph.loc[isole & (~is_annee_construction), 'tv007_label_isolation_uniforme'] = 'isolé periode isolation :' + \
                                                                               td007_ph.loc[isole & (
                                                                                   ~is_annee_construction), 'tv007_label_isolation_uniforme']

td007_ph.loc[isole & (~is_annee_construction), 'tv007_label_isolation_uniforme'] = 'isolé periode isolation :' + \
                                                                               td007_ph.loc[isole & (
                                                                                   ~is_annee_construction), 'tv007_label_isolation_uniforme']




# annee isolation uniforme.

td007_ph['annee_isole_uniforme_min'] = td007_ph.tv007_annee_construction_min.astype('string')
td007_ph['annee_isole_uniforme_max'] = td007_ph.tv007_annee_construction_max.astype('string')
td007_ph.loc[is_annee_isolation, 'annee_isole_uniforme_min'] = td007_ph.loc[is_annee_isolation,'tv007_annee_isolation_min'].astype('string')
td007_ph.loc[is_annee_isolation, 'annee_isole_uniforme_max'] = td007_ph.loc[is_annee_isolation,'tv007_annee_isolation_max'].astype('string')





# ## label méthode calcul  U

td007_ph['meth_calc_U'] = 'INCONNUE'

# calc booleens
U = td007_ph.coefficient_transmission_thermique_paroi.round(2)
U_non_isolee = td007_ph.coefficient_transmission_thermique_paroi_non_isolee.round(2)
bool_U_egal_0 = U.round(2) == 0.00
bool_U_U0 = U.round(2) == U_non_isolee.round(2)
bool_U_2 = U.round(2) >= 2 | non_isole
bool_U_U0 = bool_U_U0 & (~bool_U_2)
bool_U_U0_auto_isol = bool_U_U0 & (U_non_isolee < 1)
bool_U_brut = (U <= 1) & (~bool_U_U0)
bool_U_brut_non_isole = (U > 1) & (~bool_U_U0)
bool_U_par_e = td007_ph.epaisseur_isolation > 0
bool_U_par_r = td007_ph.resistance_thermique_isolation > 0


# remplacer 0 par nan lorsque les 0 sont des non information. 

td007_ph.loc[~bool_U_par_e,'epaisseur_isolation']=np.nan
td007_ph.loc[~bool_U_par_r,'resistance_thermique_isolation']=np.nan


# imputation labels

td007_ph.loc[bool_U_brut, 'meth_calc_U'] = 'U SAISI DIRECTEMENT : ISOLE'
td007_ph.loc[bool_U_brut_non_isole, 'meth_calc_U'] = 'U SAISI DIRECTEMENT : NON ISOLE'
td007_ph.loc[bool_U_par_e, 'meth_calc_U'] = 'EPAISSEUR ISOLATION SAISIE'
td007_ph.loc[bool_U_par_r, 'meth_calc_U'] = 'RESISTANCE ISOLATION SAISIE'
td007_ph.loc[bool_U_2, 'meth_calc_U'] = 'PLANCHER NON ISOLE U=2'
td007_ph.loc[bool_U_U0, 'meth_calc_U'] = 'PLANCHER NON ISOLE U<2'
td007_ph.loc[bool_U_U0_auto_isol, 'meth_calc_U'] = 'STRUCTURE ISOLANTE U<1'
td007_ph.loc[inconnu, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLATION INCONNUE'
td007_ph.loc[isole, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLE'
td007_ph.loc[bool_U_egal_0, 'meth_calc_U'] = 'ERREUR : U=0'


# ## label isolatoin

td007_ph['isolation'] = 'NON ISOLE'
is_isole = ~td007_ph.meth_calc_U.str.contains('NON ISOLE|INCONNUE|TERRE')
td007_ph.loc[is_isole, 'isolation'] = 'ISOLE SAISI'
is_isole_defaut = is_isole & (td007_ph.meth_calc_U.str.contains('DEFAUT'))
td007_ph.loc[is_isole_defaut, 'isolation'] = 'ISOLE DEFAUT PRE 1974'

inconnu = td007_ph.meth_calc_U.str.contains('INCONNUE')
post_74 = td007_ph['annee_isole_uniforme_min'] >= "1974"
post_2001 = td007_ph['annee_isole_uniforme_min'] >= "2001"

td007_ph.loc[inconnu, 'isolation'] = 'ISOLATION INCONNUE (DEFAUT)'

td007_ph.loc[(inconnu | is_isole_defaut) & post_74, 'isolation'] = 'ISOLE DEFAUT POST 1974'


is_isole_struc = is_isole & (td007_ph.meth_calc_U.str.contains('STRUCTURE'))

td007_ph.loc[is_isole_struc, 'isolation'] = 'STRUCTURE ISOLANTE'

is_err = td007_ph.meth_calc_U.str.contains('ERREUR')

td007_ph.loc[is_err, 'isolation'] = 'NONDEF'


# ## label adjacence

td007_ph['type_adjacence'] = 'NONDEF'

ext = td007_ph.tv001_code=='TV001_001'

td007_ph.loc[ext,'type_adjacence'] = 'EXTERIEUR'

is_dep=td007_ph.b_infer.round(1)>=0.9

td007_ph.loc[is_dep,'type_adjacence'] = 'EXTERIEUR'

enterre = td007_ph.tv001_code=='TV001_002'

td007_ph.loc[enterre,'type_adjacence'] = 'PAROI_ENTERREE'

not_null = ~td007_ph.tv002_local_non_chauffe.isnull()

td007_ph.loc[not_null, 'type_adjacence'] = 'LNC'

is_lnc = td007_ph.tv001_code.astype('string') > 'TV001_004'

td007_ph.loc[is_lnc, 'type_adjacence'] = 'LNC'

is_adj = td007_ph.tv001_code == 'TV001_004'

td007_ph.loc[is_adj, 'type_adjacence'] = 'BAT_ADJ'


# aggregation plafond

In [21]:
td007_ph = td007_ph.rename(columns={
                                       'tv002_local_non_chauffe':'type_local_non_chauffe',
                                       'coefficient_transmission_thermique_paroi':'U'})
concat = list()
type_adjacence_top = agg_pond_top_freq(td007_ph, 'type_adjacence', 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'type_adjacence_top')

type_adjacence_arr_agg = td007_ph.groupby('td001_dpe_id').type_adjacence.agg(lambda x:np.sort(x.dropna().unique()).tolist())

type_adjacence_arr_agg.name = 'type_adjacence_array'

concat.append(type_adjacence_top)
concat.append(type_adjacence_arr_agg)

type_local_non_chauffe_arr_agg = td007_ph.groupby('td001_dpe_id').type_local_non_chauffe.agg(lambda x:np.sort(x.dropna().unique()).tolist())
type_local_non_chauffe_arr_agg = type_local_non_chauffe_arr_agg.to_frame('type_LNC_plafonds_array')
type_local_non_chauffe_agg_top = agg_pond_top_freq(td007_ph, 'type_local_non_chauffe', 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'type_LNC_plafonds_top')

pivot=td007_ph.pivot_table(index='td001_dpe_id',columns='type_adjacence',values='surface_paroi_opaque_infer',aggfunc='sum')
pivot.columns = [f'surface_plafonds_{col.lower()}' for col in pivot]
concat.extend([type_local_non_chauffe_arr_agg,type_local_non_chauffe_agg_top,pivot])

for var in ['meth_calc_U','U','epaisseur_isolation','resistance_thermique_isolation','isolation',
            'annee_isole_uniforme_min','annee_isole_uniforme_max','materiaux_structure',
           ]:

    var_agg = agg_pond_top_freq(td007_ph, var, 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'{var}_plafonds_top')
    concat.append(var_agg)


for type_adjacence in ['EXTERIEUR','LNC','BAT_ADJ']:
    sel = td007_ph.loc[td007_ph.type_adjacence ==type_adjacence]
    for var in ['meth_calc_U','U','epaisseur_isolation','resistance_thermique_isolation','isolation',
                'annee_isole_uniforme_min','annee_isole_uniforme_max','materiaux_structure',
               ]:
    
        var_agg = agg_pond_top_freq(sel, var, 'surface_paroi_opaque_infer',
                               'td001_dpe_id').to_frame(f'{var}_plafonds_{type_adjacence.lower()}_top')
        concat.append(var_agg)

    
    
td007_ph_agg = pd.concat(concat,axis=1)
td007_ph_agg.index.name = 'td001_dpe_id'
td007_ph_agg

Unnamed: 0_level_0,type_adjacence_top,type_adjacence_array,type_LNC_plafonds_array,type_LNC_plafonds_top,surface_plafonds_bat_adj,surface_plafonds_exterieur,surface_plafonds_lnc,surface_plafonds_nondef,meth_calc_U_plafonds_top,U_plafonds_top,...,annee_isole_uniforme_max_plafonds_lnc_top,materiaux_structure_plafonds_lnc_top,meth_calc_U_plafonds_bat_adj_top,U_plafonds_bat_adj_top,epaisseur_isolation_plafonds_bat_adj_top,resistance_thermique_isolation_plafonds_bat_adj_top,isolation_plafonds_bat_adj_top,annee_isole_uniforme_min_plafonds_bat_adj_top,annee_isole_uniforme_max_plafonds_bat_adj_top,materiaux_structure_plafonds_bat_adj_top
td001_dpe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
178,LNC,"[EXTERIEUR, LNC]",[Comble faiblement ventilé],Comble faiblement ventilé,,29.52,77.00,,EPAISSEUR ISOLATION SAISIE,0.17,...,,Plafond entre solives bois avec ou sans rempli...,,,,,,,,
427,LNC,[LNC],[Comble fortement ventilé],Comble fortement ventilé,,,102.38,,EPAISSEUR ISOLATION SAISIE,0.14,...,,Plafond entre solives bois avec ou sans rempli...,,,,,,,,
428,LNC,[LNC],[Comble faiblement ventilé],Comble faiblement ventilé,,,114.80,,EPAISSEUR ISOLATION SAISIE,0.15,...,,Plafond en plaque de plâtre,,,,,,,,
524,EXTERIEUR,[EXTERIEUR],[],,,120.62,,,PAR DEFAUT PERIODE : ISOLATION INCONNUE,0.13,...,,,,,,,,,,
1017,LNC,"[EXTERIEUR, LNC]",[Comble faiblement ventilé],Comble faiblement ventilé,,47.65,120.00,,PAR DEFAUT PERIODE : ISOLATION INCONNUE,0.15,...,1974,Plafond entre solives bois avec ou sans rempli...,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9430358,EXTERIEUR,[EXTERIEUR],[],,,101.17,,,PAR DEFAUT PERIODE : ISOLATION INCONNUE,0.18,...,,,,,,,,,,
9430417,LNC,[LNC],[Comble faiblement ventilé],Comble faiblement ventilé,,,54.10,,PAR DEFAUT PERIODE : ISOLE,0.50,...,1977,Plafond en plaque de plâtre,,,,,,,,
9430549,LNC,[LNC],[Comble faiblement ventilé],Comble faiblement ventilé,,,70.00,,PAR DEFAUT PERIODE : ISOLATION INCONNUE,0.25,...,2000,Plafond entre solives bois avec ou sans rempli...,,,,,,,,
9430721,LNC,[LNC],[],,,,82.61,,PAR DEFAUT PERIODE : ISOLATION INCONNUE,0.53,...,1974,Dalle béton,,,,,,,,


In [22]:
td007_ph_agg.to_csv('plafonds.csv')

# traitement vitrages

In [70]:
td008_=td008.copy()

In [71]:
td008_ = td008_.rename(columns={'id':'td008_baie_id'})

td008 = td008_.merge(td008_p,on='td008_baie_id',how='left')

In [75]:
from td008_processing import td008_types

In [78]:
td008 = td008.astype({k: v for k, v in td008_types.items() if k in td008})
td008 = td008.loc[:, ~td008.columns.duplicated()]

## materiaux

In [200]:
td008['materiaux']=td008.tv021_materiaux.astype('string').fillna('NONDEF')

mat_tv010=td008.tv010_type_materiaux

td008['baie_mat_tv010']=mat_tv010
baie_mat_tv010=td008.baie_mat_tv010.astype('string')

bois_ou_PVC = mat_tv010.str.contains('bois ou PVC').fillna(False)
bois = mat_tv010.str.contains('bois').fillna(False)

metal = mat_tv010.str.contains('métal').fillna(False)
brique = mat_tv010.str.contains('Brique').fillna(False)
polycarb = mat_tv010.str.contains('Polycarbonate').fillna(False)
autres = mat_tv010.str.contains('Autres').fillna(False)

PVC = mat_tv010.str.contains('PVC').fillna(False)


baie_mat_tv010.loc[bois]='Bois'
baie_mat_tv010.loc[bois_ou_PVC]='Bois ou PVC'
baie_mat_tv010.loc[metal]='Métal'
baie_mat_tv010.loc[polycarb]='Polycarbonate'
baie_mat_tv010.loc[autres]='Autres'
baie_mat_tv010=baie_mat_tv010.fillna('NONDEF')

mat = td008.materiaux
nondef = mat == 'NONDEF'

td008.loc[nondef,'materiaux']=baie_mat_tv010.loc[nondef]

## cat baie

In [80]:
# type vitrage processing avec tv009, tv010, tv021 et reference
td008['fen_lib_from_tv009'] = td008['tv009_type_vitrage'].astype('string') + ' ' + td008[
    'tv009_remplissage'].astype('string').fillna('') + ' '
td008['fen_lib_from_tv009'] += td008['tv009_epaisseur_lame'].fillna('0').astype(int).astype(str).replace('0', '').apply(
    lambda x: x + ' mm ' if x != '' else x) + td008['tv010_type_materiaux'].astype('string').fillna('') + ' ' + \
                               td008[
                                   'tv009_traitement_vitrage'].astype('string').fillna('')
td008['fen_lib_from_tv009'] = td008['fen_lib_from_tv009'].fillna('NONDEF')

td008['fen_lib_from_tv021'] = td008['tv021_type_baie'].astype('string') + ' ' + td008[
    'tv021_type_vitrage'].astype('string').fillna('') + ' '
td008['fen_lib_from_tv021'] += td008['tv021_materiaux'].astype('string').fillna('')
td008['fen_lib_from_tv021'] = td008['fen_lib_from_tv021'].fillna('NONDEF')

double_vitrage = td008.fen_lib_from_tv009.str.lower().str.contains(
    'double') | td008.fen_lib_from_tv021.str.lower().str.contains('double')

triple_vitrage = td008.fen_lib_from_tv009.str.lower().str.contains(
    'triple') | td008.fen_lib_from_tv021.str.lower().str.contains('triple')

simple_vitrage = td008.fen_lib_from_tv009.str.lower().str.contains(
    'simple') | td008.fen_lib_from_tv021.str.lower().str.contains('simple')

porte = td008['tv010_type_materiaux'].astype(str).fillna('').str.lower().str.contains(
    'portes ')  # l'espace à la fin est important sinon confusion portes-fenetres
porte = porte | td008['tv010_type_baie'].astype(str).fillna('').str.lower().str.contains('porte ')
porte = porte | td008['reference'].fillna('').str.lower().str.contains('porte ')
porte = porte | td008['reference'].fillna('').str.lower().str.contains('portes ')
porte = porte & (~td008['reference'].fillna('').str.lower().str.contains('fen'))


td008['type_vitrage_simple_infer'] = 'NONDEF'

td008.loc[double_vitrage, 'type_vitrage_simple_infer'] = 'double vitrage'
td008.loc[triple_vitrage, 'type_vitrage_simple_infer'] = 'triple vitrage'
td008.loc[simple_vitrage, 'type_vitrage_simple_infer'] = 'simple vitrage'

td008.loc[simple_vitrage & double_vitrage, 'type_vitrage_simple_infer'] = "INCOHERENT"
td008.loc[simple_vitrage & triple_vitrage, 'type_vitrage_simple_infer'] = "INCOHERENT"
td008.loc[triple_vitrage & double_vitrage, 'type_vitrage_simple_infer'] = "INCOHERENT"
td008.loc[porte, 'type_vitrage_simple_infer'] = "porte"

# distinction brique de verre

brique = td008['tv010_type_materiaux'].astype(str).fillna('').str.lower().str.contains('brique')

brique = brique | td008['tv010_type_materiaux'].astype(str).fillna('').str.lower().str.contains('polycarb')

brique = brique | td008.reference.str.lower().str.contains('brique')

brique = brique | td008.reference.str.lower().str.contains('polycarb')

td008.loc[brique, 'type_vitrage_simple_infer'] = "brique de verre ou polycarbonate"

td008.type_vitrage_simple_infer = td008.type_vitrage_simple_infer.astype('category')

# traitement avancé en utilisant les valeurs.
# s_type_from_value = intervals_to_category(td008.coefficient_transmission_thermique_baie,infer_type_by_value)

# infer_type_by_value = {'simple vitrage':[3.7,7],
#                       'double vitrage':[2,3.69],
#                       'triple vitrage':[1,2],
#                       'INCOHERENT':[0,0.99]}

# inc=td008.type_vitrage_simple_infer=='INCOHERENT'
# nondef=td008.type_vitrage_simple_infer=='NONDEF'
# inc_or_nondef=inc|nondef

# td008.loc[inc_or_nondef,'type_vitrage_simple_infer'] = s_type_from_value[inc_or_nondef]

# quantitatifs (EXPERIMENTAL)
td008['nb_baie_calc'] = (
        td008.deperdition / (td008.surface * td008.coefficient_transmission_thermique_baie)).round(0)
null = (td008.surface == 0) | (td008.coefficient_transmission_thermique_baie == 0) | (td008.deperdition == 0)
td008.loc[null, 'nb_baie_calc'] = np.nan
zeros = td008.nb_baie_calc == 0
td008.loc[zeros, 'nb_baie_calc'] = np.nan

td008['surfacexnb_baie_calc'] = td008.surface * td008.nb_baie_calc

# TYPE MENUISERIE
## type menuiserie en fonction des caractéristiques déjà inférée
baie = td008.type_vitrage_simple_infer.str.contains('vitrage')
porte = td008.type_vitrage_simple_infer.str.contains('porte')
brique = td008.type_vitrage_simple_infer.str.contains('brique')

td008['cat_baie_simple_infer'] = 'NONDEF'
td008.loc[baie, 'cat_baie_simple_infer'] = 'baie vitrée'
td008.loc[porte, 'cat_baie_simple_infer'] = 'porte'
td008.loc[brique, 'cat_baie_simple_infer'] = 'paroi en brique de verre ou polycarbonate'

nondef = td008.cat_baie_simple_infer == "NONDEF"
## pour les non def on va chercher dans le string de description
# type menuiserie en fonction des caractéristiques déjà inférée
baie = td008.type_vitrage_simple_infer.str.contains('vitrage')
porte = td008.type_vitrage_simple_infer.str.contains('porte')
brique = td008.type_vitrage_simple_infer.str.contains('brique')

td008['cat_baie_simple_infer'] = 'NONDEF'
td008.loc[baie, 'cat_baie_simple_infer'] = 'baie_vitree'
td008.loc[porte, 'cat_baie_simple_infer'] = 'porte'

nondef = td008.cat_baie_simple_infer == "NONDEF"
# pour les non def on va chercher dans le string de description
baie = td008.reference.str.lower().str.contains('fen')
ref = td008.reference.str.lower()
baie = baie | ref.str.contains('baie')
baie = baie | ref.str.startswith('f')
baie = baie | ref.str.startswith('pf')
baie = baie | ref.str.startswith('sv')
baie = baie | ref.str.contains('velux')
baie = baie | (~td008.tv009_coefficient_transmission_thermique_vitrage_id.isnull())
baie = baie | ref.str.contains('velux')
baie = baie | (td008.tv009_coefficient_transmission_thermique_vitrage_id.isnull())
baie = baie | td008['tv010_type_baie'].str.lower().str.contains('fen')
baie = baie | td008.reference.str.lower().str.contains('vitr')
porte = td008.reference.str.lower().str.contains('porte') & (~baie)
td008.loc[nondef & baie, 'cat_baie_simple_infer'] = 'baie_vitree'
td008.loc[nondef & porte, 'cat_baie_simple_infer'] = 'porte'
td008.loc[brique, 'cat_baie_simple_infer'] = 'paroi_brique_ou_poly'
td008.cat_baie_simple_infer = td008.cat_baie_simple_infer.astype('category')

In [102]:
pf = td008.tv021_type_baie.str.contains('ortes-fenêtres').fillna(False)
pf = pf | td008.tv010_type_menuiserie.str.contains('ortes-fenêtres').fillna(False)
pf = pf | td008.reference.str.startswith('pf')

f = td008.tv021_type_baie.str.contains('enêtre').fillna(False)
f = f | td008.tv010_type_menuiserie.str.contains('enêtre').fillna(False)

td008['cat_baie_infer']=td008.cat_baie_simple_infer.astype('string')
cat_baie = td008.cat_baie


cat_baie.loc[f]='fenetre'
cat_baie.loc[pf]='porte_fenetre'
is_baie=cat_baie=='baie_vitree'
cat_baie.loc[is_baie]='fenetre'
p_simple=td008.cat_baie_simple_infer=='porte'
cat_baie.loc[p_simple]='porte'
td008['cat_baie_infer']=cat_baie

## methode saisie U

In [103]:
not_vitrage=td008.tv009_code.isnull()
not_baie=td008.tv010_code.isnull()


td008.loc[not_baie,'meth_calc_U']='Uw saisi'
td008.loc[~not_baie,'meth_calc_U']='Uw defaut'



not_fs=td008.tv021_code.isnull()

td008.loc[not_fs,'meth_calc_Fs']='Fs saisi'
td008.loc[~not_fs,'meth_calc_Fs']='Fs defaut'



## masques et occultations

In [106]:
td008['avancee_masque']= td008.tv022_avance
td008['type_occultation']=td008.tv011_fermetures
td008['type_masque']=td008.tv022_type_masque
td008['avancee_masque']=pd.Categorical(td008['avancee_masque'],categories=['< 1 m','1 <= … < 2', '2 <= … < 3', '3 <='],ordered=True)
td008['presence_balcon']=td008.tv022_type_masque.str.contains('balcon').replace(False,np.nan)

## agg

In [246]:
from utils import agg_pond_top_freq,agg_pond_avg

concat = list()


surfs=td008.pivot_table(index='td001_dpe_id',columns = 'cat_baie_infer',values='surfacexnb_baie_calc',aggfunc='sum')
surfs.columns = [f'surface_{col}' for col in surfs]
concat.append(surfs)

td008 = td008.rename(columns={'coefficient_transmission_thermique_baie':'Ubaie',
                             'type_vitrage_simple_infer':'type_vitrage',
                             'tv010_uw':'Uw',
                             'tv010_ug':'Ug'})

td008.Uw = td008.Uw.astype('string').fillna('NONDEF')
td008.Ug = td008.Ug.astype('string').fillna('NONDEF')

td008_vit = td008.loc[td008.cat_baie_simple_infer!='porte']

for col in ['Ubaie','Uw','Ug','type_occultation','materiaux','type_vitrage','meth_calc_U','meth_calc_Fs']:
    var_agg = agg_pond_top_freq(td008_vit, col, 'surfacexnb_baie_calc',
                           'td001_dpe_id').to_frame(col+'_baie_vitree_top')
    concat.append(var_agg)



td008_opaque = td008.loc[td008.cat_baie_simple_infer=='porte']

for col in ['Ubaie','materiaux','meth_calc_U','meth_calc_Fs']:
    var_agg = agg_pond_top_freq(td008_opaque, col, 'surfacexnb_baie_calc',
                           'td001_dpe_id').to_frame(col+'_porte_top')
    concat.append(var_agg)



for type_baie in ['fenetre','porte_fenetre']: 
    
    sel = td008_vit.loc[td008_vit.cat_baie_infer==type_baie]
    for col in ['Ubaie','Uw','Ug','type_occultation','materiaux','type_vitrage','meth_calc_U','meth_calc_Fs']:
        var_agg = agg_pond_top_freq(sel, col, 'surfacexnb_baie_calc',
                               'td001_dpe_id').to_frame(col+f'_{type_baie}_top')
        concat.append(var_agg)


avancee_masque_max=td008.groupby('td001_dpe_id').avancee_masque.apply(lambda x:x.sort_values(ascending=False).iloc[0] if x.isnull().sum()>0 else  np.nan)
concat.append(avancee_masque_max.to_frame('avancee_masque_max'))

concat.append((td008.groupby('td001_dpe_id').presence_balcon.sum()>0).to_frame('presence_balcon'))

td008_baie_agg = pd.concat(concat,axis=1)
td008_baie_agg.index.name = 'td001_dpe_id'


In [263]:
td008_baie_agg.type_vitrage_baie_vitree_top.value_counts()

double vitrage                      40497
simple vitrage                       1564
triple vitrage                        223
INCOHERENT                            100
NONDEF                                 88
brique de verre ou polycarbonate       10
Name: type_vitrage_baie_vitree_top, dtype: int64

# PT

In [41]:
td006 = td006.rename(columns={"id": "td006_batiment_id"})
td007 = td007.rename(columns={"id": "td007_paroi_opaque_id"})
td008 = td008.rename(columns={"id": "td008_baie_id"})
td001 = td001.rename(columns={"id": "td001_dpe_id"})

In [42]:
def merge_td010_tr_tv(td010):
    from trtvtables import DPETrTvTables
    meta = DPETrTvTables()
    table = td010.copy()
    table = meta.merge_all_tr_tables(table)

    table = meta.merge_all_tv_tables(table)

    table = table.loc[:, ~table.columns.duplicated()]

    return table

In [43]:
td010 = merge_td010_tr_tv(td010)


KeyError: 'tv013_code'

In [None]:
td010.columns

In [44]:
td010 = td010.merge(td006[['td006_batiment_id', 'td001_dpe_id']], on='td006_batiment_id', how='left')


In [54]:
td010_types = {'longueur':'float',}

In [55]:
td010 = td010.astype(td010_types)

In [66]:


long=td010.pivot_table(index='td001_dpe_id',columns = 'type_liaison',values='longueur',aggfunc='sum')
long.columns = [f'longueur_{col}' for col in long]

type_isol_mur = agg_pond_top_freq(td010, 'tv013_isolation_mur', 'longueur',
                       'td001_dpe_id').to_frame('type_isolation_mur')
td010_pb = td010.loc[td010.type_liaison=='pb_mur']
type_isol_plancher = agg_pond_top_freq(td010_pb, 'tv013_plancher_bas', 'longueur',
                       'td001_dpe_id').to_frame('type_isolation_plancher')

td010_ph = td010.loc[td010.type_liaison=='ph_mur']
type_isol_plafond = agg_pond_top_freq(td010_ph, 'tv013_plancher_bas', 'longueur',
                       'td001_dpe_id').to_frame('type_isolation_plafond')

td010_pt_agg = pd.concat([type_isol_mur,type_isol_plancher,type_isol_plafond,long],axis=1)
td010_pt_agg.index.name = 'td001_dpe_id'


In [90]:
td010_pt_agg

Unnamed: 0_level_0,type_isolation_mur,type_isolation_plancher,type_isolation_plafond,longueur_menui_mur,longueur_menui_ph,longueur_pb_mur,longueur_ph_mur,longueur_pi_mur,longueur_refend_mur
td001_dpe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
178,ITI+ITE,ITE,,,,41.48,,41.48,
427,ITR,ITI,,,,50.00,,,
428,ITI,ITI,,,,80.00,,,60.0
524,ITI,ITI,,,,45.93,,,
1017,ITI,Non isolé,,,,41.80,,41.80,10.0
...,...,...,...,...,...,...,...,...,...
9430358,ITI,Non isolé,,51.42,,45.12,,,
9430417,ITI,ITE,,43.04,,47.15,,,
9430549,ITI,,,66.19,,,,,
9430721,ITI,,,35.10,,,,,
