In [1]:
import numpy as np


In [2]:
import pandas as pd
from pathlib import Path
import json
from td001_processing import postprocessing_td001
from utils import round_float_cols,unique_ordered
from config import paths
import numpy as np
def run_enveloppe_processing(td001, td006, td007, td008):
    from td007_processing import merge_td007_tr_tv, postprocessing_td007
    from td008_processing import merge_td008_tr_tv, postprocessing_td008
    from td001_merge import merge_td001_dpe_id_envelope
    from td007_processing import agg_td007_to_td001_essential, agg_surface_envelope
    from td008_processing import agg_td008_to_td001_essential

    td008_raw_cols = td008.columns.tolist()
    td007_raw_cols = td007.columns.tolist()

    td001, td006, td007, td008 = merge_td001_dpe_id_envelope(td001=td001, td006=td006, td007=td007, td008=td008)

    td008 = merge_td008_tr_tv(td008)
    td008 = postprocessing_td008(td008)

    td007 = merge_td007_tr_tv(td007)
    td007 = postprocessing_td007(td007, td008)

    agg_td007 = agg_td007_to_td001_essential(td007)
    agg_td008 = agg_td008_to_td001_essential(td008)
    agg_surfaces = agg_surface_envelope(td007, td008)

    td001_enveloppe_agg = pd.concat([agg_td007, agg_td008, agg_surfaces], axis=1)

    td001_enveloppe_agg.index.name = 'td001_dpe_id'
    cols = [el for el in td008.columns if el not in td008_raw_cols + ['fen_lib_from_tv009',
                                                                      'fen_lib_from_tv021']]
    cols.append('td008_baie_id')
    cols = unique_ordered(cols)
    td008_p = td008[cols]
    cols = [el for el in td007.columns if
            el not in td007_raw_cols + ["qualif_surf", 'surface_paroi_opaque_calc', 'surface_paroi_totale_calc_v1',
                                        'surface_paroi_totale_calc_v2']]
    cols.append('td007_paroi_opaque_id')
    cols = unique_ordered(cols)
    td007_p = td007[cols]
    return td001_enveloppe_agg, td008_p, td007_p


def run_system_processing(td001, td006, td011, td012, td013, td014):
    from td011_td012_processing import merge_td012_tr_tv, postprocessing_td012, merge_td011_tr_tv, \
        agg_systeme_chauffage_essential
    from td013_td014_processing import merge_td013_tr_tv, postprocessing_td014, merge_td014_tr_tv, \
        agg_systeme_ecs_essential
    from td001_merge import merge_td001_dpe_id_system

    td011_raw_cols = td011.columns.tolist()
    td012_raw_cols = td012.columns.tolist()
    td013_raw_cols = td013.columns.tolist()
    td014_raw_cols = td014.columns.tolist()
    td001, td006, td011, td012, td013, td014 = merge_td001_dpe_id_system(td001, td006, td011, td012, td013, td014)
    td011 = merge_td011_tr_tv(td011)
    td012 = merge_td012_tr_tv(td012)
    td013 = merge_td013_tr_tv(td013)
    td014 = merge_td014_tr_tv(td014)

    td012 = postprocessing_td012(td012)

    cols = [el for el in td011.columns if el not in td011_raw_cols]
    cols.append('td011_installation_chauffage_id')
    cols = unique_ordered(cols)
    td011_p = td011[cols]

    cols = [el for el in td012.columns if
            el not in td012_raw_cols + ['besoin_chauffage_infer', 'gen_ch_concat_txt_desc']]
    cols.append('td012_generateur_chauffage_id')
    cols = unique_ordered(cols)
    td012_p = td012[cols]

    td001_sys_ch_agg = agg_systeme_chauffage_essential(td001, td011, td012)

    td014 = postprocessing_td014(td013, td014)

    cols = [el for el in td013.columns if el not in td013_raw_cols]
    cols.append('td013_installation_ecs_id')
    cols = unique_ordered(cols)
    td013_p = td013[cols]

    cols = [el for el in td014.columns if
            el not in td014_raw_cols + ['score_gen_ecs_lib_infer', 'gen_ecs_concat_txt_desc']]
    cols.append('td014_generateur_ecs_id')
    cols = unique_ordered(cols)
    td014_p = td014[cols]

    td001_sys_ecs_agg = agg_systeme_ecs_essential(td001, td013, td014)

    return td011_p, td012_p, td001_sys_ch_agg, td013_p, td014_p, td001_sys_ecs_agg


def build_doc(annexe_dir):
    from doc_annexe import td001_annexe_enveloppe_agg_desc, td001_sys_ch_agg_desc, td001_sys_ecs_agg_desc, \
        td007_annexe_desc, td008_annexe_desc, td012_annexe_desc, td014_annexe_desc, enums_cstb,td001_annexe_generale_desc

    doc_annexe = dict()
    doc_annexe['td001_annexe_generale'] = td001_annexe_generale_desc
    doc_annexe['td001_annexe_enveloppe_agg'] = td001_annexe_enveloppe_agg_desc
    doc_annexe['td001_sys_ch_agg'] = td001_sys_ch_agg_desc
    doc_annexe['td001_sys_ecs_agg'] = td001_sys_ecs_agg_desc
    doc_annexe['td007_annexe'] = td007_annexe_desc
    doc_annexe['td008_annexe'] = td008_annexe_desc
    doc_annexe['td012_annexe'] = td012_annexe_desc
    doc_annexe['td014_annexe'] = td014_annexe_desc

    with open(annexe_dir / 'doc_table_annexes_cstb.json', 'w', encoding='utf-8') as f:
        json.dump(doc_annexe, f, indent=4)

    with open(annexe_dir / 'enum_table_annexes_cstb.json', 'w', encoding='utf-8') as f:
        json.dump(enums_cstb, f, indent=4)


if __name__ == '__main__':

    data_dir = paths['DPE_DEPT_PATH']
    annexe_dir = paths['DPE_DEPT_ANNEXE_PATH']
    annexe_dir = Path(annexe_dir)
    annexe_dir.mkdir(exist_ok=True, parents=True)
    build_doc(annexe_dir)
    for dept_dir in Path(data_dir).iterdir():
        print(dept_dir)
        annexe_dept_dir = annexe_dir / dept_dir.name
        annexe_dept_dir.mkdir(exist_ok=True, parents=True)
        # LOAD TABLES
        td007 = pd.read_csv(dept_dir / 'td007_paroi_opaque.csv', dtype=str)
        td006 = pd.read_csv(dept_dir / 'td006_batiment.csv', dtype=str)
        td001 = pd.read_csv(dept_dir / 'td001_dpe.csv', dtype=str)
        td008 = pd.read_csv(dept_dir / 'td008_baie.csv', dtype=str)
        td008 = td008.drop('td008_baie_id', axis=1)

        # ENVELOPPE PROCESSING
        td001_enveloppe_agg, td008_p, td007_p = run_enveloppe_processing(td001, td006, td007, td008)

        round_float_cols(td001_enveloppe_agg).to_csv(annexe_dept_dir / 'td001_annexe_enveloppe_agg.csv')
        round_float_cols(td007_p).to_csv(annexe_dept_dir / 'td007_paroi_opaque_annexe.csv')
        round_float_cols(td008_p).to_csv(annexe_dept_dir / 'td008_baie_annexe.csv')

        # SYSTEM PROCESSING

        td011 = pd.read_csv(dept_dir / 'td011_installation_chauffage.csv', dtype=str)
        td012 = pd.read_csv(dept_dir / 'td012_generateur_chauffage.csv', dtype=str)
        td013 = pd.read_csv(dept_dir / 'td013_installation_ecs.csv', dtype=str)
        td014 = pd.read_csv(dept_dir / 'td014_generateur_ecs.csv', dtype=str)

        td011_p, td012_p, td001_sys_ch_agg, td013_p, td014_p, td001_sys_ecs_agg = run_system_processing(td001, td006,
                                                                                                        td011, td012,
                                                                                                        td013, td014)
        round_float_cols(td001_sys_ch_agg).to_csv(annexe_dept_dir / 'td001_annexe_sys_ch_agg.csv')
        round_float_cols(td001_sys_ecs_agg).to_csv(annexe_dept_dir / 'td001_annexe_sys_ecs_agg.csv')
        round_float_cols(td011_p).to_csv(annexe_dept_dir / 'td011_annexe_installation_chauffage.csv')
        round_float_cols(td012_p).to_csv(annexe_dept_dir / 'td012_annexe_generateur_chauffage.csv')
        round_float_cols(td013_p).to_csv(annexe_dept_dir / 'td013_annexe_installation_ecs.csv')
        round_float_cols(td014_p).to_csv(annexe_dept_dir / 'td014_annexe_generateur_ecs.csv')

        # add td001 processing
        postprocessing_td001(td001)[['nom_methode_dpe_norm', 'id']].rename(columns={'id': 'td001_dpe_id'}).to_csv(
            annexe_dept_dir / 'td001_annexe_generale.csv')
        break

D:\data\dpe_full\depts\1
tv013_valeur_pont_thermique_isolation_planche_bas not found
tv025_type_batiment not found
tv025_type_emetteur not found
tv025_equipement_intermittence not found
tv026_classe_inertie_plancher_bas not found
tv026_classe_inertie_plancher_haut not found
tv026_classe_inertie_paroi_verticale not found
tv026_classe_inertie_classe_inertie not found
tv029_simu_type_distribution not found


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table[pond_col] = table[pond]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table[pond_value_col_temp] = table[pond_col] * table[value_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexe

In [3]:
td010 = pd.read_csv(dept_dir / 'td010_pont_thermique.csv', dtype=str)


In [4]:
td007 = td007.rename(columns ={'id':'td007_paroi_opaque_id'})

In [5]:
td007 = td007.merge(td007_p,on='td007_paroi_opaque_id',how='left')

# traitements murs

In [6]:
from utils import agg_pond_top_freq,agg_pond_avg

In [117]:
td007_murs = td007.loc[td007.tr014_type_parois_opaque_id.isin(['2', '1'])].copy()

float_cols = ['coefficient_transmission_thermique_paroi_non_isolee', 'coefficient_transmission_thermique_paroi',
              'epaisseur_isolation', 'resistance_thermique_isolation']
td007_murs[float_cols] = td007_murs[float_cols].astype(float)

# ## label uniforme tv003

td007_murs['tv003_periode_isolation_uniforme'] = td007_murs.tv003_annee_construction.astype('string')

td007_murs['tv003_label_isolation_uniforme'] = td007_murs.tv003_annee_construction.astype('string')

null = td007_murs['tv003_label_isolation_uniforme'].isnull()

td007_murs.loc[null, 'tv003_label_isolation_uniforme'] = td007_murs.loc[null, 'tv003_annee_isolation'].astype(
    'string')

inconnu = td007_murs.tv003_mur_isole.isnull() & (~td007_murs.tv003_annee_construction.isnull())
non_isole = td007_murs.tv003_mur_isole == '0'
isole = td007_murs.tv003_mur_isole == '1'
is_annee_construction = ~td007_murs.tv003_annee_construction.isnull()
is_annee_isolation = ~td007_murs.tv003_annee_isolation.isnull()

td007_murs.loc[inconnu, 'tv003_label_isolation_uniforme'] = 'isol. inconnue periode constr : ' + td007_murs.loc[
    inconnu, 'tv003_label_isolation_uniforme']
td007_murs.loc[non_isole, 'tv003_label_isolation_uniforme'] = 'non isolé'
td007_murs.loc[isole & is_annee_construction, 'tv003_label_isolation_uniforme'] = 'isolé periode constr : ' + \
                                                                            td007_murs.loc[
                                                                                isole & is_annee_construction, 'tv003_label_isolation_uniforme']
td007_murs.loc[isole & (~is_annee_construction), 'tv003_label_isolation_uniforme'] = 'isolé periode isolation :' + \
                                                                               td007_murs.loc[isole & (
                                                                                   ~is_annee_construction), 'tv003_label_isolation_uniforme']

# annee isolation uniforme.

td007_murs['annee_isole_uniforme_min'] = td007_murs.tv003_annee_construction_min.astype('string')
td007_murs['annee_isole_uniforme_max'] = td007_murs.tv003_annee_construction_max.astype('string')
td007_murs.loc[is_annee_isolation, 'annee_isole_uniforme_min'] = td007_murs.loc[is_annee_isolation,'tv003_annee_isolation_min'].astype('string')
td007_murs.loc[is_annee_isolation, 'annee_isole_uniforme_max'] = td007_murs.loc[is_annee_isolation,'tv003_annee_isolation_max'].astype('string')


td007_murs.tv003_label_isolation_uniforme.value_counts()

# ## label méthode calcul  U

td007_murs['meth_calc_U'] = 'INCONNUE'

# calc booleens
U = td007_murs.coefficient_transmission_thermique_paroi.round(2)
U_non_isolee = td007_murs.coefficient_transmission_thermique_paroi_non_isolee.round(2)
bool_U_egal_0 = U.round(2) == 0.00
bool_U_U0 = U.round(2) == U_non_isolee.round(2)
bool_U_2 = U.round(2) >= 2 | non_isole
bool_U_U0 = bool_U_U0 & (~bool_U_2)
bool_U_U0_auto_isol = bool_U_U0 & (U_non_isolee < 1)
bool_U_brut = (U <= 1) & (~bool_U_U0)
bool_U_brut_non_isole = (U > 1) & (~bool_U_U0)
bool_U_par_e = td007_murs.epaisseur_isolation > 0
bool_U_par_r = td007_murs.resistance_thermique_isolation > 0


# remplacer 0 par nan lorsque les 0 sont des non information. 

td007_murs.loc[~bool_U_par_e,'epaisseur_isolation']=np.nan
td007_murs.loc[~bool_U_par_r,'resistance_thermique_isolation']=np.nan


# imputation labels

td007_murs.loc[bool_U_brut, 'meth_calc_U'] = 'U SAISI DIRECTEMENT : ISOLE'
td007_murs.loc[bool_U_brut_non_isole, 'meth_calc_U'] = 'U SAISI DIRECTEMENT : NON ISOLE'
td007_murs.loc[bool_U_par_e, 'meth_calc_U'] = 'EPAISSEUR ISOLATION SAISIE'
td007_murs.loc[bool_U_par_r, 'meth_calc_U'] = 'RESISTANCE ISOLATION SAISIE'
td007_murs.loc[bool_U_2, 'meth_calc_U'] = 'MUR NON ISOLE U=2'
td007_murs.loc[bool_U_U0, 'meth_calc_U'] = 'MUR NON ISOLE U<2'
td007_murs.loc[bool_U_U0_auto_isol, 'meth_calc_U'] = 'STRUCTURE ISOLANTE (ITR) U<1'
td007_murs.loc[inconnu, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLATION INCONNUE'
td007_murs.loc[isole, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLE'
td007_murs.loc[isole, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLE'
td007_murs.loc[bool_U_egal_0, 'meth_calc_U'] = 'ERREUR : U=0'

# ## label isolatoin

td007_murs['isolation'] = 'NON ISOLE'
is_isole = ~td007_murs.meth_calc_U.str.contains('NON ISOLE|INCONNUE')
td007_murs.loc[is_isole, 'isolation'] = 'ISOLE SAISI'
is_isole_defaut = is_isole & (td007_murs.meth_calc_U.str.contains('DEFAUT'))
td007_murs.loc[is_isole_defaut, 'isolation'] = 'ISOLE DEFAUT PRE 1988'

inconnu = td007_murs.meth_calc_U.str.contains('INCONNUE')
post_88 = td007_murs['annee_isole_uniforme_min'] >= "1988"

td007_murs.loc[inconnu, 'isolation'] = 'ISOLATION INCONNUE (DEFAUT)'

td007_murs.loc[(inconnu | is_isole_defaut) & post_88, 'isolation'] = 'ISOLE DEFAUT POST 1988'

is_isole_struc = is_isole & (td007_murs.meth_calc_U.str.contains('STRUCTURE'))
td007_murs.loc[is_isole_struc, 'isolation'] = 'STRUCTURE ISOLANTE (ITR)'

is_err = td007_murs.meth_calc_U.str.contains('ERREUR')

td007_murs.loc[is_err, 'isolation'] = 'NONDEF'

# ## label adjacence

td007_murs['type_adjacence'] = 'NONDEF'

ext = td007_murs.tv001_code=='TV001_001'

td007_murs.loc[ext,'type_adjacence'] = 'EXTERIEUR'

is_dep=td007_murs.b_infer.round(1)>=0.9

td007_murs.loc[is_dep,'type_adjacence'] = 'EXTERIEUR'

enterre = td007_murs.tv001_code=='TV001_002'

td007_murs.loc[enterre,'type_adjacence'] = 'PAROI_ENTERREE'

not_null = ~td007_murs.tv002_local_non_chauffe.isnull()

td007_murs.loc[not_null, 'type_adjacence'] = 'LNC'

is_lnc = td007_murs.tv001_code.astype('string') > 'TV001_004'

td007_murs.loc[is_lnc, 'type_adjacence'] = 'LNC'

is_adj = td007_murs.tv001_code == 'TV001_004'

td007_murs.loc[is_adj, 'type_adjacence'] = 'BAT_ADJ'

In [118]:
td007_murs = td007_murs.rename(columns={'tv004_epaisseur':'epaisseur_structure',
                                       'tv002_local_non_chauffe':'type_local_non_chauffe',
                                       'coefficient_transmission_thermique_paroi':'U'})

# aggregation td001 murs

In [119]:
concat = list()
for var in ['meth_calc_U','U','epaisseur_isolation','resistance_thermique_isolation','isolation',
            'annee_isole_uniforme_min','annee_isole_uniforme_max','materiaux_structure','epaisseur_structure',
           ]:

    var_agg = agg_pond_top_freq(td007_murs, var, 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'{var}_murs_all_top')
    concat.append(var_agg)
for type_adjacence in ['EXTERIEUR','LNC','BAT_ADJ']:
    sel = td007_murs.loc[td007_murs.type_adjacence ==type_adjacence]
    for var in ['meth_calc_U','U','epaisseur_isolation','resistance_thermique_isolation','isolation',
                'annee_isole_uniforme_min','annee_isole_uniforme_max','materiaux_structure','epaisseur_structure',
               ]:
    
        var_agg = agg_pond_top_freq(sel, var, 'surface_paroi_opaque_infer',
                               'td001_dpe_id').to_frame(f'{var}_murs_{type_adjacence.lower()}_top')
        concat.append(var_agg)

    
    
table_concat = pd.concat(concat,axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if isinstance(pond, str):


In [120]:
adjacences = td007_murs.groupby('td001_dpe_id').type_local_non_chauffe.agg(lambda x:x.dropna().unique().tolist())
adjacences = td007_murs.groupby('td001_dpe_id').type_local_non_chauffe.agg(lambda x:x.dropna().unique().tolist())
adjacences.to_frame('adjacences_LNC_murs')
var_agg = agg_pond_top_freq(td007_murs, 'type_local_non_chauffe', 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'adjacence_LNC_murs_top')

In [121]:
pivot=td007_murs.pivot_table(index='td001_dpe_id',columns='type_adjacence',values='surface_paroi_opaque_infer',aggfunc='sum')
pivot.columns = [f'surface_murs_{col.lower()}' for col in pivot]

In [122]:
td007_murs_agg = pd.concat([table_concat,var_agg,adjacences,pivot],axis=1)

In [123]:
td007_murs_agg

Unnamed: 0,meth_calc_U_murs_all_top,U_murs_all_top,epaisseur_isolation_murs_all_top,resistance_thermique_isolation_murs_all_top,isolation_murs_all_top,annee_isole_uniforme_min_murs_all_top,annee_isole_uniforme_max_murs_all_top,materiaux_structure_murs_all_top,epaisseur_structure_murs_all_top,meth_calc_U_murs_exterieur_top,...,annee_isole_uniforme_max_murs_bat_adj_top,materiaux_structure_murs_bat_adj_top,epaisseur_structure_murs_bat_adj_top,adjacence_LNC_murs_top,type_local_non_chauffe,surface_murs_bat_adj,surface_murs_exterieur,surface_murs_lnc,surface_murs_nondef,surface_murs_paroi_enterree
999746,PAR DEFAUT PERIODE : ISOLE,0.33,,,ISOLE DEFAUT POST 1988,2001,2900,Murs en pisé ou béton de terre stabilisé (à pa...,40 et -,PAR DEFAUT PERIODE : ISOLE,...,,,,,[],,62.85,,,
999726,PAR DEFAUT PERIODE : ISOLE,0.33,,,ISOLE DEFAUT POST 1988,2001,2900,Murs en pisé ou béton de terre stabilisé (à pa...,40 et -,PAR DEFAUT PERIODE : ISOLE,...,,,,,[],,80.95,,,
999660,EPAISSEUR ISOLATION SAISIE,0.35,10.0,,ISOLE SAISI,,,Murs en blocs de béton creux,20 et -,EPAISSEUR ISOLATION SAISIE,...,,,,Sous-sols,"[Véranda, Sous-sols, Comble faiblement ventilé]",,109.67,50.33,,
99948,RESISTANCE ISOLATION SAISIE,0.33,,2.67,ISOLE SAISI,,,Murs en blocs de béton creux,20 et -,RESISTANCE ISOLATION SAISIE,...,,,,,[],,105.15,,,
99943,RESISTANCE ISOLATION SAISIE,0.53,,1.33,ISOLE SAISI,2006,2900,,,RESISTANCE ISOLATION SAISIE,...,,,,Garage,[Garage],,102.60,16.00,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
983345,,,,,,,,,,,...,,,,,[],,0.00,,,
983371,,,,,,,,,,,...,,,,,[],,0.00,,,
983383,,,,,,,,,,,...,,,,,[],,0.00,,,
983414,,,,,,,,,,,...,,,,,[],,0.00,,,


In [15]:
td007_murs_agg[['materiaux_structure_murs_exterieur_top','epaisseur_structure_murs_exterieur_top']]

Unnamed: 0,materiaux_structure_murs_exterieur_top,epaisseur_structure_murs_exterieur_top
999746,Murs en pisé ou béton de terre stabilisé (à pa...,40 et -
999726,Murs en pisé ou béton de terre stabilisé (à pa...,40 et -
999660,Murs en blocs de béton creux,20 et -
99948,Murs en blocs de béton creux,20 et -
99943,,
...,...,...
983345,,
983371,,
983383,,
983414,,


# traitements planchers

In [113]:
td007_planchers = td007.loc[td007.tr014_type_parois_opaque_id=='3'].copy()

float_cols = ['coefficient_transmission_thermique_paroi_non_isolee', 'coefficient_transmission_thermique_paroi',
              'epaisseur_isolation', 'resistance_thermique_isolation']
td007_planchers[float_cols] = td007_planchers[float_cols].astype(float)



# ## label uniforme tv005

td007_planchers['tv005_periode_isolation_uniforme'] = td007_planchers.tv005_annee_construction.astype('string')

td007_planchers['tv005_label_isolation_uniforme'] = td007_planchers.tv005_annee_construction.astype('string')

null = td007_planchers['tv005_label_isolation_uniforme'].isnull()

td007_planchers.loc[null, 'tv005_label_isolation_uniforme'] = td007_planchers.loc[null, 'tv005_annee_isolation'].astype(
    'string')


inconnu = td007_planchers.tv005_pb_isole=="Inconnu"
non_isole = td007_planchers.tv005_pb_isole == 'Non'
isole = td007_planchers.tv005_pb_isole == '1'
tp  = td007_planchers.tv005_pb_isole == 'Terre Plein'

is_annee_construction = ~td007_planchers.tv005_annee_construction.isnull()
is_annee_isolation = ~td007_planchers.tv005_annee_isolation.isnull()

td007_planchers.loc[inconnu, 'tv005_label_isolation_uniforme'] = 'isol. inconnue periode constr : ' + td007_planchers.loc[
    inconnu, 'tv005_label_isolation_uniforme']
td007_planchers.loc[non_isole, 'tv005_label_isolation_uniforme'] = 'non isolé'

td007_planchers.loc[isole & is_annee_construction, 'tv005_label_isolation_uniforme'] = 'isolé periode constr : ' + \
                                                                            td007_planchers.loc[
                                                                                isole & is_annee_construction, 'tv005_label_isolation_uniforme']
td007_planchers.loc[isole & (~is_annee_construction), 'tv005_label_isolation_uniforme'] = 'isolé periode isolation :' + \
                                                                               td007_planchers.loc[isole & (
                                                                                   ~is_annee_construction), 'tv005_label_isolation_uniforme']

td007_planchers.loc[isole & (~is_annee_construction), 'tv005_label_isolation_uniforme'] = 'isolé periode isolation :' + \
                                                                               td007_planchers.loc[isole & (
                                                                                   ~is_annee_construction), 'tv005_label_isolation_uniforme']

td007_planchers.loc[tp, 'tv005_label_isolation_uniforme'] = 'Terre Plein periode constr : ' + td007_planchers.loc[
    tp, 'tv005_label_isolation_uniforme']


# annee isolation uniforme.

td007_planchers['annee_isole_uniforme_min'] = td007_planchers.tv005_annee_construction_min.astype('string')
td007_planchers['annee_isole_uniforme_max'] = td007_planchers.tv005_annee_construction_max.astype('string')
td007_planchers.loc[is_annee_isolation, 'annee_isole_uniforme_min'] = td007_planchers.loc[is_annee_isolation,'tv005_annee_isolation_min'].astype('string')
td007_planchers.loc[is_annee_isolation, 'annee_isole_uniforme_max'] = td007_planchers.loc[is_annee_isolation,'tv005_annee_isolation_max'].astype('string')





# ## label méthode calcul  U

td007_planchers['meth_calc_U'] = 'INCONNUE'

# calc booleens
U = td007_planchers.coefficient_transmission_thermique_paroi.round(2)
U_non_isolee = td007_planchers.coefficient_transmission_thermique_paroi_non_isolee.round(2)
bool_U_egal_0 = U.round(2) == 0.00
bool_U_U0 = U.round(2) == U_non_isolee.round(2)
bool_U_2 = U.round(2) >= 2 | non_isole
bool_U_U0 = bool_U_U0 & (~bool_U_2)
bool_U_U0_auto_isol = bool_U_U0 & (U_non_isolee < 1)
bool_U_brut = (U <= 1) & (~bool_U_U0)
bool_U_brut_non_isole = (U > 1) & (~bool_U_U0)
bool_U_par_e = td007_planchers.epaisseur_isolation > 0
bool_U_par_r = td007_planchers.resistance_thermique_isolation > 0


# remplacer 0 par nan lorsque les 0 sont des non information. 

td007_planchers.loc[~bool_U_par_e,'epaisseur_isolation']=np.nan
td007_planchers.loc[~bool_U_par_r,'resistance_thermique_isolation']=np.nan


# imputation labels

td007_planchers.loc[bool_U_brut, 'meth_calc_U'] = 'U SAISI DIRECTEMENT : ISOLE'
td007_planchers.loc[bool_U_brut_non_isole, 'meth_calc_U'] = 'U SAISI DIRECTEMENT : NON ISOLE'
td007_planchers.loc[bool_U_par_e, 'meth_calc_U'] = 'EPAISSEUR ISOLATION SAISIE'
td007_planchers.loc[bool_U_par_r, 'meth_calc_U'] = 'RESISTANCE ISOLATION SAISIE'
td007_planchers.loc[bool_U_2, 'meth_calc_U'] = 'PLANCHER NON ISOLE U=2'
td007_planchers.loc[bool_U_U0, 'meth_calc_U'] = 'PLANCHER NON ISOLE U<2'
td007_planchers.loc[bool_U_U0_auto_isol, 'meth_calc_U'] = 'STRUCTURE ISOLANTE U<1'
td007_planchers.loc[inconnu, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLATION INCONNUE'
td007_planchers.loc[isole, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLE'
td007_planchers.loc[tp, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : TERRE PLEIN'
td007_planchers.loc[bool_U_egal_0, 'meth_calc_U'] = 'ERREUR : U=0'


# ## label isolatoin

td007_planchers['isolation'] = 'NON ISOLE'
is_isole = ~td007_planchers.meth_calc_U.str.contains('NON ISOLE|INCONNUE|TERRE')
td007_planchers.loc[is_isole, 'isolation'] = 'ISOLE SAISI'
is_isole_defaut = is_isole & (td007_planchers.meth_calc_U.str.contains('DEFAUT'))
td007_planchers.loc[is_isole_defaut, 'isolation'] = 'ISOLE DEFAUT PRE 1982'

inconnu = td007_planchers.meth_calc_U.str.contains('INCONNUE')
post_82 = td007_planchers['annee_isole_uniforme_min'] >= "1982"
post_2001 = td007_planchers['annee_isole_uniforme_min'] >= "2001"

td007_planchers.loc[inconnu, 'isolation'] = 'ISOLATION INCONNUE (DEFAUT)'

td007_planchers.loc[(inconnu | is_isole_defaut) & post_82, 'isolation'] = 'ISOLE DEFAUT POST 1982'


td007_planchers.loc[tp, 'isolation'] = 'TERRE PLEIN DEFAUT PRE 2001'
td007_planchers.loc[tp & post_2001, 'isolation'] = 'TERRE PLEIN DEFAUT POST 2001'


is_isole_struc = is_isole & (td007_planchers.meth_calc_U.str.contains('STRUCTURE'))

td007_planchers.loc[is_isole_struc, 'isolation'] = 'STRUCTURE ISOLANTE'

is_err = td007_planchers.meth_calc_U.str.contains('ERREUR')

td007_planchers.loc[is_err, 'isolation'] = 'NONDEF'


# ## label adjacence

td007_planchers['type_adjacence'] = 'NONDEF'

ext = td007_planchers.tv001_code=='TV001_001'

td007_planchers.loc[ext,'type_adjacence'] = 'EXTERIEUR'

is_dep=td007_planchers.b_infer.round(1)>=0.9

td007_planchers.loc[is_dep,'type_adjacence'] = 'EXTERIEUR'

enterre = td007_planchers.tv001_code=='TV001_002'

td007_planchers.loc[enterre,'type_adjacence'] = 'PAROI_ENTERREE'

not_null = ~td007_planchers.tv002_local_non_chauffe.isnull()

td007_planchers.loc[not_null, 'type_adjacence'] = 'LNC'

is_lnc = td007_planchers.tv001_code.astype('string') > 'TV001_004'

td007_planchers.loc[is_lnc, 'type_adjacence'] = 'LNC'

is_adj = td007_planchers.tv001_code == 'TV001_004'

td007_planchers.loc[is_adj, 'type_adjacence'] = 'BAT_ADJ'

is_tp = td007_planchers.tv001_code=='TV001_261'

td007_planchers.loc[is_tp, 'type_adjacence'] = 'TERRE PLEIN'


In [114]:
td007_planchers = td007_planchers.rename(columns={
                                       'tv002_local_non_chauffe':'type_local_non_chauffe',
                                       'coefficient_transmission_thermique_paroi':'U'})

In [115]:
concat = list()

for var in ['meth_calc_U','U','epaisseur_isolation','resistance_thermique_isolation','isolation',
            'annee_isole_uniforme_min','annee_isole_uniforme_max','materiaux_structure',
           ]:

    var_agg = agg_pond_top_freq(td007_planchers, var, 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'{var}_plancher_all_top')
    concat.append(var_agg)
for type_adjacence in ['EXTERIEUR','TERRE PLEIN','LNC','BAT_ADJ']:
    sel = td007_planchers.loc[td007_planchers.type_adjacence ==type_adjacence]
    for var in ['meth_calc_U','U','epaisseur_isolation','resistance_thermique_isolation','isolation',
                'annee_isole_uniforme_min','annee_isole_uniforme_max','materiaux_structure',
               ]:
    
        var_agg = agg_pond_top_freq(sel, var, 'surface_paroi_opaque_infer',
                               'td001_dpe_id').to_frame(f'{var}_plancher_{type_adjacence.lower()}_top')
        concat.append(var_agg)

    
    
table_concat = pd.concat(concat,axis=1)

KeyError: 'Uplafond'

In [112]:
table_concat

Unnamed: 0,meth_calc_U_plafond_all_top,Uplafond_plafond_all_top,epaisseur_isolation_plafond_all_top,resistance_thermique_isolation_plafond_all_top,isolation_plafond_all_top,annee_isole_uniforme_min_plafond_all_top,annee_isole_uniforme_max_plafond_all_top,materiaux_structure_plafond_all_top,meth_calc_U_plafond_exterieur_top,Uplafond_plafond_exterieur_top,...,annee_isole_uniforme_max_plafond_lnc_top,materiaux_structure_plafond_lnc_top,meth_calc_U_plafond_bat_adj_top,Uplafond_plafond_bat_adj_top,epaisseur_isolation_plafond_bat_adj_top,resistance_thermique_isolation_plafond_bat_adj_top,isolation_plafond_bat_adj_top,annee_isole_uniforme_min_plafond_bat_adj_top,annee_isole_uniforme_max_plafond_bat_adj_top,materiaux_structure_plafond_bat_adj_top
999746,PAR DEFAUT PERIODE : TERRE PLEIN,0.37,,,TERRE PLEIN DEFAUT PRE 2001,1900,2001,Dalle béton,PAR DEFAUT PERIODE : TERRE PLEIN,0.37,...,,,,,,,,,,
999726,PAR DEFAUT PERIODE : TERRE PLEIN,0.37,,,TERRE PLEIN DEFAUT PRE 2001,1900,2001,Dalle béton,PAR DEFAUT PERIODE : TERRE PLEIN,0.37,...,,,,,,,,,,
999660,PAR DEFAUT PERIODE : ISOLE,0.55,,,ISOLE DEFAUT POST 1982,1989,2000,"Plancher lourd type entrevous terre-cuite, pou...",,,...,2000,"Plancher lourd type entrevous terre-cuite, pou...",,,,,,,,
99948,PAR DEFAUT PERIODE : ISOLE,0.30,,,ISOLE DEFAUT POST 1982,2001,2005,"Plancher lourd type entrevous terre-cuite, pou...",,,...,2005,"Plancher lourd type entrevous terre-cuite, pou...",,,,,,,,
99943,PAR DEFAUT PERIODE : ISOLE,0.34,,,ISOLE DEFAUT POST 1982,2006,2100,Dalle béton,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1002180,PLAFOND NON ISOLE U=2,2.00,,,NON ISOLE,1900,2001,Dalle béton,,,...,,,,,,,,,,
100207,EPAISSEUR ISOLATION SAISIE,0.59,5.0,,ISOLE SAISI,,,"Plancher lourd type entrevous terre-cuite, pou...",,,...,,"Plancher lourd type entrevous terre-cuite, pou...",,,,,,,,
1001934,PAR DEFAUT PERIODE : TERRE PLEIN,0.37,,,TERRE PLEIN DEFAUT PRE 2001,1900,2001,Dalle béton,,,...,2000,"Plancher lourd type entrevous terre-cuite, pou...",,,,,,,,
1000989,PLAFOND NON ISOLE U=2,2.00,,,NON ISOLE,,,Dalle béton,,,...,,Dalle béton,,,,,,,,
