In [26]:
import numpy as np


In [1]:
import pandas as pd
from pathlib import Path
import json
from td001_processing import postprocessing_td001
from utils import round_float_cols,unique_ordered
from config import paths
import numpy as np
def run_enveloppe_processing(td001, td006, td007, td008):
    from td007_processing import merge_td007_tr_tv, postprocessing_td007
    from td008_processing import merge_td008_tr_tv, postprocessing_td008
    from td001_merge import merge_td001_dpe_id_envelope
    from td007_processing import agg_td007_to_td001_essential, agg_surface_envelope
    from td008_processing import agg_td008_to_td001_essential

    td008_raw_cols = td008.columns.tolist()
    td007_raw_cols = td007.columns.tolist()

    td001, td006, td007, td008 = merge_td001_dpe_id_envelope(td001=td001, td006=td006, td007=td007, td008=td008)

    td008 = merge_td008_tr_tv(td008)
    td008 = postprocessing_td008(td008)

    td007 = merge_td007_tr_tv(td007)
    td007 = postprocessing_td007(td007, td008)

    agg_td007 = agg_td007_to_td001_essential(td007)
    agg_td008 = agg_td008_to_td001_essential(td008)
    agg_surfaces = agg_surface_envelope(td007, td008)

    td001_enveloppe_agg = pd.concat([agg_td007, agg_td008, agg_surfaces], axis=1)

    td001_enveloppe_agg.index.name = 'td001_dpe_id'
    cols = [el for el in td008.columns if el not in td008_raw_cols + ['fen_lib_from_tv009',
                                                                      'fen_lib_from_tv021']]
    cols.append('td008_baie_id')
    cols = unique_ordered(cols)
    td008_p = td008[cols]
    cols = [el for el in td007.columns if
            el not in td007_raw_cols + ["qualif_surf", 'surface_paroi_opaque_calc', 'surface_paroi_totale_calc_v1',
                                        'surface_paroi_totale_calc_v2']]
    cols.append('td007_paroi_opaque_id')
    cols = unique_ordered(cols)
    td007_p = td007[cols]
    return td001_enveloppe_agg, td008_p, td007_p


def run_system_processing(td001, td006, td011, td012, td013, td014):
    from td011_td012_processing import merge_td012_tr_tv, postprocessing_td012, merge_td011_tr_tv, \
        agg_systeme_chauffage_essential
    from td013_td014_processing import merge_td013_tr_tv, postprocessing_td014, merge_td014_tr_tv, \
        agg_systeme_ecs_essential
    from td001_merge import merge_td001_dpe_id_system

    td011_raw_cols = td011.columns.tolist()
    td012_raw_cols = td012.columns.tolist()
    td013_raw_cols = td013.columns.tolist()
    td014_raw_cols = td014.columns.tolist()
    td001, td006, td011, td012, td013, td014 = merge_td001_dpe_id_system(td001, td006, td011, td012, td013, td014)
    td011 = merge_td011_tr_tv(td011)
    td012 = merge_td012_tr_tv(td012)
    td013 = merge_td013_tr_tv(td013)
    td014 = merge_td014_tr_tv(td014)

    td012 = postprocessing_td012(td012)

    cols = [el for el in td011.columns if el not in td011_raw_cols]
    cols.append('td011_installation_chauffage_id')
    cols = unique_ordered(cols)
    td011_p = td011[cols]

    cols = [el for el in td012.columns if
            el not in td012_raw_cols + ['besoin_chauffage_infer', 'gen_ch_concat_txt_desc']]
    cols.append('td012_generateur_chauffage_id')
    cols = unique_ordered(cols)
    td012_p = td012[cols]

    td001_sys_ch_agg = agg_systeme_chauffage_essential(td001, td011, td012)

    td014 = postprocessing_td014(td013, td014)

    cols = [el for el in td013.columns if el not in td013_raw_cols]
    cols.append('td013_installation_ecs_id')
    cols = unique_ordered(cols)
    td013_p = td013[cols]

    cols = [el for el in td014.columns if
            el not in td014_raw_cols + ['score_gen_ecs_lib_infer', 'gen_ecs_concat_txt_desc']]
    cols.append('td014_generateur_ecs_id')
    cols = unique_ordered(cols)
    td014_p = td014[cols]

    td001_sys_ecs_agg = agg_systeme_ecs_essential(td001, td013, td014)

    return td011_p, td012_p, td001_sys_ch_agg, td013_p, td014_p, td001_sys_ecs_agg


def build_doc(annexe_dir):
    from doc_annexe import td001_annexe_enveloppe_agg_desc, td001_sys_ch_agg_desc, td001_sys_ecs_agg_desc, \
        td007_annexe_desc, td008_annexe_desc, td012_annexe_desc, td014_annexe_desc, enums_cstb,td001_annexe_generale_desc

    doc_annexe = dict()
    doc_annexe['td001_annexe_generale'] = td001_annexe_generale_desc
    doc_annexe['td001_annexe_enveloppe_agg'] = td001_annexe_enveloppe_agg_desc
    doc_annexe['td001_sys_ch_agg'] = td001_sys_ch_agg_desc
    doc_annexe['td001_sys_ecs_agg'] = td001_sys_ecs_agg_desc
    doc_annexe['td007_annexe'] = td007_annexe_desc
    doc_annexe['td008_annexe'] = td008_annexe_desc
    doc_annexe['td012_annexe'] = td012_annexe_desc
    doc_annexe['td014_annexe'] = td014_annexe_desc

    with open(annexe_dir / 'doc_table_annexes_cstb.json', 'w', encoding='utf-8') as f:
        json.dump(doc_annexe, f, indent=4)

    with open(annexe_dir / 'enum_table_annexes_cstb.json', 'w', encoding='utf-8') as f:
        json.dump(enums_cstb, f, indent=4)


if __name__ == '__main__':

    data_dir = paths['DPE_DEPT_PATH']
    annexe_dir = paths['DPE_DEPT_ANNEXE_PATH']
    annexe_dir = Path(annexe_dir)
    annexe_dir.mkdir(exist_ok=True, parents=True)
    build_doc(annexe_dir)
    for dept_dir in Path(data_dir).iterdir():
        print(dept_dir)
        annexe_dept_dir = annexe_dir / dept_dir.name
        annexe_dept_dir.mkdir(exist_ok=True, parents=True)
        # LOAD TABLES
        td007 = pd.read_csv(dept_dir / 'td007_paroi_opaque.csv', dtype=str)
        td006 = pd.read_csv(dept_dir / 'td006_batiment.csv', dtype=str)
        td001 = pd.read_csv(dept_dir / 'td001_dpe.csv', dtype=str)
        td008 = pd.read_csv(dept_dir / 'td008_baie.csv', dtype=str)
        td008 = td008.drop('td008_baie_id', axis=1)

        # ENVELOPPE PROCESSING
        td001_enveloppe_agg, td008_p, td007_p = run_enveloppe_processing(td001, td006, td007, td008)

        round_float_cols(td001_enveloppe_agg).to_csv(annexe_dept_dir / 'td001_annexe_enveloppe_agg.csv')
        round_float_cols(td007_p).to_csv(annexe_dept_dir / 'td007_paroi_opaque_annexe.csv')
        round_float_cols(td008_p).to_csv(annexe_dept_dir / 'td008_baie_annexe.csv')

        # SYSTEM PROCESSING

        td011 = pd.read_csv(dept_dir / 'td011_installation_chauffage.csv', dtype=str)
        td012 = pd.read_csv(dept_dir / 'td012_generateur_chauffage.csv', dtype=str)
        td013 = pd.read_csv(dept_dir / 'td013_installation_ecs.csv', dtype=str)
        td014 = pd.read_csv(dept_dir / 'td014_generateur_ecs.csv', dtype=str)

        td011_p, td012_p, td001_sys_ch_agg, td013_p, td014_p, td001_sys_ecs_agg = run_system_processing(td001, td006,
                                                                                                        td011, td012,
                                                                                                        td013, td014)
        round_float_cols(td001_sys_ch_agg).to_csv(annexe_dept_dir / 'td001_annexe_sys_ch_agg.csv')
        round_float_cols(td001_sys_ecs_agg).to_csv(annexe_dept_dir / 'td001_annexe_sys_ecs_agg.csv')
        round_float_cols(td011_p).to_csv(annexe_dept_dir / 'td011_annexe_installation_chauffage.csv')
        round_float_cols(td012_p).to_csv(annexe_dept_dir / 'td012_annexe_generateur_chauffage.csv')
        round_float_cols(td013_p).to_csv(annexe_dept_dir / 'td013_annexe_installation_ecs.csv')
        round_float_cols(td014_p).to_csv(annexe_dept_dir / 'td014_annexe_generateur_ecs.csv')

        # add td001 processing
        postprocessing_td001(td001)[['nom_methode_dpe_norm', 'id']].rename(columns={'id': 'td001_dpe_id'}).to_csv(
            annexe_dept_dir / 'td001_annexe_generale.csv')
        break

D:\data\dpe_full\depts\1
tv013_valeur_pont_thermique_isolation_planche_bas not found
tv025_type_batiment not found
tv025_type_emetteur not found
tv025_equipement_intermittence not found
tv026_classe_inertie_plancher_bas not found
tv026_classe_inertie_plancher_haut not found
tv026_classe_inertie_paroi_verticale not found
tv026_classe_inertie_classe_inertie not found
tv029_simu_type_distribution not found


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table[pond_col] = table[pond]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table[pond_value_col_temp] = table[pond_col] * table[value_col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexe

In [2]:
td010 = pd.read_csv(dept_dir / 'td010_pont_thermique.csv', dtype=str)


In [3]:
td007 = td007.rename(columns ={'id':'td007_paroi_opaque_id'})

In [4]:
td007 = td007.merge(td007_p,on='td007_paroi_opaque_id',how='left')

# traitements murs

In [5]:
from utils import agg_pond_top_freq,agg_pond_avg

In [130]:
td007_murs = td007.loc[td007.tr014_type_parois_opaque_id.isin(['2', '1'])].copy()

float_cols = ['coefficient_transmission_thermique_paroi_non_isolee', 'coefficient_transmission_thermique_paroi',
              'epaisseur_isolation', 'resistance_thermique_isolation']
td007_murs[float_cols] = td007_murs[float_cols].astype(float)

# ## label uniforme tv003

td007_murs['tv003_periode_isolation_uniforme'] = td007_murs.tv003_annee_construction.astype('string')

td007_murs['tv003_label_isolation_uniforme'] = td007_murs.tv003_annee_construction.astype('string')

null = td007_murs['tv003_label_isolation_uniforme'].isnull()

td007_murs.loc[null, 'tv003_label_isolation_uniforme'] = td007_murs.loc[null, 'tv003_annee_isolation'].astype(
    'string')

inconnu = td007_murs.tv003_mur_isole.isnull() & (~td007_murs.tv003_annee_construction.isnull())
non_isole = td007_murs.tv003_mur_isole == '0'
isole = td007_murs.tv003_mur_isole == '1'
is_annee_construction = ~td007_murs.tv003_annee_construction.isnull()
is_annee_isolation = ~td007_murs.tv003_annee_isolation.isnull()

td007_murs.loc[inconnu, 'tv003_label_isolation_uniforme'] = 'isol. inconnue periode constr : ' + td007_murs.loc[
    inconnu, 'tv003_label_isolation_uniforme']
td007_murs.loc[non_isole, 'tv003_label_isolation_uniforme'] = 'non isolé'
td007_murs.loc[isole & is_annee_construction, 'tv003_label_isolation_uniforme'] = 'isolé periode constr : ' + \
                                                                            td007_murs.loc[
                                                                                isole & is_annee_construction, 'tv003_label_isolation_uniforme']
td007_murs.loc[isole & (~is_annee_construction), 'tv003_label_isolation_uniforme'] = 'isolé periode isolation :' + \
                                                                               td007_murs.loc[isole & (
                                                                                   ~is_annee_construction), 'tv003_label_isolation_uniforme']

# annee isolation uniforme.

td007_murs['annee_isole_uniforme_min'] = td007_murs.tv003_annee_construction_min.astype('string')
td007_murs['annee_isole_uniforme_max'] = td007_murs.tv003_annee_construction_max.astype('string')
td007_murs.loc[is_annee_isolation, 'annee_isole_uniforme_min'] = td007_murs.loc[is_annee_isolation,'tv003_annee_isolation_min'].astype('string')
td007_murs.loc[is_annee_isolation, 'annee_isole_uniforme_max'] = td007_murs.loc[is_annee_isolation,'tv003_annee_isolation_max'].astype('string')


td007_murs.tv003_label_isolation_uniforme.value_counts()

# ## label méthode calcul  U

td007_murs['meth_calc_U'] = 'INCONNUE'

# calc booleens
U = td007_murs.coefficient_transmission_thermique_paroi.round(2)
U_non_isolee = td007_murs.coefficient_transmission_thermique_paroi_non_isolee.round(2)
bool_U_egal_0 = U.round(2) == 0.00
bool_U_U0 = U.round(2) == U_non_isolee.round(2)
bool_U_2 = U.round(2) >= 2 | non_isole
bool_U_U0 = bool_U_U0 & (~bool_U_2)
bool_U_U0_auto_isol = bool_U_U0 & (U_non_isolee < 1)
bool_U_brut = (U <= 1) & (~bool_U_U0)
bool_U_brut_non_isole = (U > 1) & (~bool_U_U0)
bool_U_par_e = td007_murs.epaisseur_isolation > 0
bool_U_par_r = td007_murs.resistance_thermique_isolation > 0


# remplacer 0 par nan lorsque les 0 sont des non information. 

td007_murs.loc[~bool_U_par_e,'epaisseur_isolation']=np.nan
td007_murs.loc[~bool_U_par_r,'resistance_thermique_isolation']=np.nan


# imputation labels

td007_murs.loc[bool_U_brut, 'meth_calc_U'] = 'U SAISI DIRECTEMENT : ISOLE'
td007_murs.loc[bool_U_brut_non_isole, 'meth_calc_U'] = 'U SAISI DIRECTEMENT : NON ISOLE'
td007_murs.loc[bool_U_par_e, 'meth_calc_U'] = 'EPAISSEUR ISOLATION SAISIE'
td007_murs.loc[bool_U_par_r, 'meth_calc_U'] = 'RESISTANCE ISOLATION SAISIE'
td007_murs.loc[bool_U_2, 'meth_calc_U'] = 'MUR NON ISOLE U=2'
td007_murs.loc[bool_U_U0, 'meth_calc_U'] = 'MUR NON ISOLE U<2'
td007_murs.loc[bool_U_U0_auto_isol, 'meth_calc_U'] = 'STRUCTURE ISOLANTE (ITR) U<1'
td007_murs.loc[inconnu, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLATION INCONNUE'
td007_murs.loc[isole, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLE'
td007_murs.loc[isole, 'meth_calc_U'] = 'PAR DEFAUT PERIODE : ISOLE'
td007_murs.loc[bool_U_egal_0, 'meth_calc_U'] = 'ERREUR : U=0'

# ## label isolatoin

td007_murs['isolation'] = 'NON ISOLE'
is_isole = ~td007_murs.meth_calc_U.str.contains('NON ISOLE|INCONNUE')
td007_murs.loc[is_isole, 'isolation'] = 'ISOLE SAISI'
is_isole_defaut = is_isole & (td007_murs.meth_calc_U.str.contains('DEFAUT'))
td007_murs.loc[is_isole_defaut, 'isolation'] = 'ISOLE DEFAUT PRE 1988'

inconnu = td007_murs.meth_calc_U.str.contains('INCONNUE')
post_88 = td007_murs['annee_isole_uniforme_min'] >= "1988"

td007_murs.loc[inconnu, 'isolation'] = 'ISOLATION INCONNUE (DEFAUT)'

td007_murs.loc[(inconnu | is_isole_defaut) & post_88, 'isolation'] = 'ISOLE DEFAUT POST 1988'

is_isole_struc = is_isole & (td007_murs.meth_calc_U.str.contains('STRUCTURE'))
td007_murs.loc[is_isole_struc, 'isolation'] = 'STRUCTURE ISOLANTE (ITR)'

is_err = td007_murs.meth_calc_U.str.contains('ERREUR')

td007_murs.loc[is_err, 'isolation'] = 'NONDEF'

# ## label adjacence

td007_murs['type_adjacence'] = 'NONDEF'

ext = td007_murs.tv001_code=='TV001_001'

td007_murs.loc[ext,'type_adjacence'] = 'EXTERIEUR'

is_dep=td007_murs.b_infer.round(1)>=0.9

td007_murs.loc[is_dep,'type_adjacence'] = 'EXTERIEUR'

enterre = td007_murs.tv001_code=='TV001_002'

td007_murs.loc[enterre,'type_adjacence'] = 'PAROI_ENTERREE'

not_null = ~td007_murs.tv002_local_non_chauffe.isnull()

td007_murs.loc[not_null, 'type_adjacence'] = 'LNC'

is_lnc = td007_murs.tv001_code.astype('string') > 'TV001_004'

td007_murs.loc[is_lnc, 'type_adjacence'] = 'LNC'

is_adj = td007_murs.tv001_code == 'TV001_004'

td007_murs.loc[is_adj, 'type_adjacence'] = 'BAT_ADJ'

In [131]:
test=td007_murs.isolation=='ISOLE DEFAUT PRE 1988'

In [133]:
td007_murs.loc[test][['tv003_code','tv003_annee_construction_min','tv003_annee_isolation_min','annee_isole_uniforme_min']]

Unnamed: 0,tv003_code,tv003_annee_construction_min,tv003_annee_isolation_min,annee_isole_uniforme_min
211,TV003_047,,1975,1975
217,TV003_049,,1983,1983
218,TV003_049,,1983,1983
219,TV003_049,,1983,1983
220,TV003_049,,1983,1983
...,...,...,...,...
237341,TV003_049,,1983,1983
237343,TV003_049,,1983,1983
237400,TV003_044,1900,,1900
237451,TV003_047,,1975,1975


In [135]:
td007_murs = td007_murs.rename(columns={'tv004_epaisseur':'epaisseur_structure',
                                       'tv002_local_non_chauffe':'type_local_non_chauffe',
                                       'coefficient_transmission_thermique_paroi':'Umur'})

# aggregation td001 murs

In [180]:
concat = list()
for type_adjacence in ['EXTERIEUR','LNC','BAT_ADJ']:
    sel = td007_murs.loc[td007_murs.type_adjacence ==type_adjacence]
    for var in ['meth_calc_U','Umur','epaisseur_isolation','resistance_thermique_isolation','isolation',
                'annee_isole_uniforme_min','annee_isole_uniforme_max','materiaux_structure','epaisseur_structure',
               ]:
    
        var_agg = agg_pond_top_freq(sel, var, 'surface_paroi_opaque_infer',
                               'td001_dpe_id').to_frame(f'{var}_murs_{type_adjacence.lower()}_top')
        concat.append(var_agg)

    
    
table_concat = pd.concat(concat,axis=1)

ValueError: StringArray requires a sequence of strings or pandas.NA

In [138]:
adjacences = td007_murs.groupby('td001_dpe_id').type_local_non_chauffe.agg(lambda x:x.dropna().unique().tolist())
adjacences = td007_murs.groupby('td001_dpe_id').type_local_non_chauffe.agg(lambda x:x.dropna().unique().tolist())
adjacences.to_frame('adjacences_LNC_murs')
var_agg = agg_pond_top_freq(td007_murs, 'type_local_non_chauffe', 'surface_paroi_opaque_infer',
                           'td001_dpe_id').to_frame(f'adjacence_LNC_murs_top')

In [139]:
pivot=td007_murs.pivot_table(index='td001_dpe_id',columns='type_adjacence',values='surface_paroi_opaque_infer',aggfunc='sum')
pivot.columns = [f'surface_murs_{col.lower()}' for col in pivot]

In [140]:
td007_murs_agg = pd.concat([table_concat,var_agg,adjacences,pivot],axis=1)

In [145]:
td007_murs.loc[td007_murs.td001_dpe_id=='99943',['epaisseur_structure']+[col for col in td007_murs if col.startswith('tv004')]]

Unnamed: 0,epaisseur_structure,tv004_umur0_id,tv004_code,tv004_umur,tv004_materiaux
3167,,,,,
3168,,,,,
3169,,,,,


In [161]:
var ='epaisseur_structure'

In [173]:
sel[var]=sel[var].astype('string')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [174]:
var_agg = agg_pond_top_freq(sel, var, 'surface_paroi_opaque_infer',
                               'td001_dpe_id').to_frame(f'{var}_murs_{type_adjacence.lower()}_top')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table[pond_col] = table[pond]


In [194]:
var='epaisseur_structure'

In [198]:
table = sel
pond = 'surface_paroi_opaque_infer'
enum_col = var
by='td001_dpe_id'
bool_filter_col = None
bool_filter_not = None
from utils import _prep_agg_pond
import uuid

In [None]:
def agg_pond_top_freq(table, enum_col, pond, by, bool_filter_col=None, bool_filter_not=False):
    """
    function to make an topfreq ponderate serie from a table column

    Parameters
    ----------
    table : pd.DataFrame

    enum_col : str
    column containing enumerator values
    pond : str,list
    column or columns containing numeric values
    by : str or list
    pandas.DataFrame.Groupby argument
    bool_filter_col : str
    column containing a boolean array to filter data
    bool_filter_not : bool
    if true take the negative of the boolean array instead

    Returns
    -------
    grp : pd.Series
    serie of ponderated topfreq of enum_col

    """

    pond_col = str(uuid.uuid4())
    table = _prep_agg_pond(table, pond, bool_filter_col, pond_col, bool_filter_not)
    if isinstance(table[enum_col].dtype,pd.CategoricalDtype):
        table=table.copy()
        table[enum_col]=table[enum_col].astype(table[enum_col].dtype.categories.dtype)
    grp = table.groupby([by, enum_col])[pond_col].sum()
    is_0 = grp <= 0
    grp.loc[is_0] = np.nan
    s = grp.reset_index().sort_values([by, pond_col], ascending=False).dropna(subset=[pond_col]).drop_duplicates(subset=by).set_index(by)[
        enum_col]
    
    return s

In [212]:
s = table.Umur.astype('category')

dtype('float64')

dtype('float64')

In [225]:
dir(s.dtype)

['__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_cache',
 '_categories',
 '_finalize',
 '_from_categorical_dtype',
 '_from_fastpath',
 '_from_values_or_dtype',
 '_hash_categories',
 '_is_boolean',
 '_is_numeric',
 '_metadata',
 '_ordered',
 'base',
 'categories',
 'construct_array_type',
 'construct_from_string',
 'is_dtype',
 'isbuiltin',
 'isnative',
 'itemsize',
 'kind',
 'na_value',
 'name',
 'names',
 'num',
 'ordered',
 'reset_cache',
 'shape',
 'str',
 'subdtype',
 'type',
 'update_dtype',
 'validate_categories',
 'validate_ordered']

In [207]:
pond_col = str(uuid.uuid4())
table = _prep_agg_pond(table, pond, bool_filter_col, pond_col, bool_filter_not)
grp = table.groupby([by, enum_col])[pond_col].sum()
is_0 = grp <= 0
grp.loc[is_0] = np.nan
s = grp.reset_index().sort_values([by, pond_col], ascending=False).drop_duplicates(subset=by).set_index(by)[
    enum_col]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table[pond_col] = table[pond]


In [196]:
pond_col = str(uuid.uuid4())
table = _prep_agg_pond(table, pond, bool_filter_col, pond_col, bool_filter_not)
grp = table.groupby([by, enum_col])[pond_col].sum()
is_0 = grp <= 0
grp.loc[is_0] = np.nan

In [199]:
grp = table.groupby([by, enum_col])[pond_col].sum()

In [210]:
s.loc['99943']

KeyError: '99943'

In [206]:
grp.reset_index().sort_values([by, pond_col], ascending=False).set_index(by)[
    enum_col]

td001_dpe_id
999746     40 et -
999726     40 et -
999660     20 et -
99948      20 et -
998855        22.5
            ...   
1002180    20 et -
100207     20 et -
1002065    20 et -
1001934    20 et -
1000142    20 et -
Name: epaisseur_structure, Length: 39651, dtype: category
Categories (40, object): [10, 10 et -, 12, 13, ..., 8 et -, 80, 9 et -, Sans objet]

In [202]:
grp.reset_index().sort_values([by, pond_col], ascending=False).drop_duplicates(subset=by).set_index(by)[
    enum_col]

td001_dpe_id
999746     40 et -
999726     40 et -
999660     20 et -
99948      20 et -
99943           10
            ...   
100207     20 et -
1002065    20 et -
1001934    20 et -
1000989         10
1000142    20 et -
Name: epaisseur_structure, Length: 44637, dtype: category
Categories (40, object): [10, 10 et -, 12, 13, ..., 8 et -, 80, 9 et -, Sans objet]

In [175]:
sel.loc[sel.td001_dpe_id=='99943'][var]

3167    <NA>
3168    <NA>
Name: epaisseur_structure, dtype: string

In [176]:
sel.loc[sel.td001_dpe_id=='99943'][var]

3167    <NA>
3168    <NA>
Name: epaisseur_structure, dtype: string

In [178]:
var_agg.loc['99943']

KeyError: '99943'

In [147]:
td007_murs_agg.loc['99943']

meth_calc_U_murs_exterieur_top                       RESISTANCE ISOLATION SAISIE
Umur_murs_exterieur_top                                                     0.53
epaisseur_isolation_murs_exterieur_top                                       NaN
resistance_thermique_isolation_murs_exterieur_top                           1.33
isolation_murs_exterieur_top                                         ISOLE SAISI
annee_isole_uniforme_min_murs_exterieur_top                                  NaN
annee_isole_uniforme_max_murs_exterieur_top                                  NaN
materiaux_structure_murs_exterieur_top                                       NaN
epaisseur_structure_murs_exterieur_top                                        10
meth_calc_U_murs_lnc_top                              PAR DEFAUT PERIODE : ISOLE
Umur_murs_lnc_top                                                           0.36
epaisseur_isolation_murs_lnc_top                                             NaN
resistance_thermique_isolati

Unnamed: 0,Umur,isolation,tv003_umur_id,tv003_code,tv003_mur_isole,tv003_annee_construction,tv003_annee_construction_min,tv003_annee_construction_max,tv003_annee_isolation,tv003_annee_isolation_min,tv003_annee_isolation_max,tv003_effet_joule,tv003_umur,tv003_periode_isolation_uniforme,tv003_label_isolation_uniforme
26600,0.0,NONDEF,,,,,,,,,,,,,
26601,0.33,ISOLE DEFAUT PRE 1988,46.0,TV003_046,1.0,à partir de 2000,2001.0,2900.0,,,,,0.42,à partir de 2000,isolé periode constr : à partir de 2000
26602,0.33,ISOLE DEFAUT PRE 1988,46.0,TV003_046,1.0,à partir de 2000,2001.0,2900.0,,,,,0.42,à partir de 2000,isolé periode constr : à partir de 2000
26603,0.33,ISOLE DEFAUT PRE 1988,46.0,TV003_046,1.0,à partir de 2000,2001.0,2900.0,,,,,0.42,à partir de 2000,isolé periode constr : à partir de 2000
26604,0.0,NONDEF,,,,,,,,,,,,,
26605,0.33,ISOLE DEFAUT PRE 1988,46.0,TV003_046,1.0,à partir de 2000,2001.0,2900.0,,,,,0.42,à partir de 2000,isolé periode constr : à partir de 2000
26606,0.33,ISOLE DEFAUT PRE 1988,46.0,TV003_046,1.0,à partir de 2000,2001.0,2900.0,,,,,0.42,à partir de 2000,isolé periode constr : à partir de 2000
26607,0.0,NONDEF,,,,,,,,,,,,,


['tv003_umur_id',
 'tv003_code',
 'tv003_mur_isole',
 'tv003_annee_construction',
 'tv003_annee_construction_min',
 'tv003_annee_construction_max',
 'tv003_annee_isolation',
 'tv003_annee_isolation_min',
 'tv003_annee_isolation_max',
 'tv003_effet_joule',
 'tv003_umur',
 'tv003_periode_isolation_uniforme',
 'tv003_label_isolation_uniforme']

In [45]:
td007_murs_agg.Umur_murs_exterieur_top.head(24)

999746    0.33
999726    0.33
999660    0.35
99948     0.33
99943     0.53
998893    0.36
998855    0.35
998844    0.35
998830    0.57
998725    2.00
998420    0.36
99839     0.35
99819     2.00
997963    2.00
997103    2.00
996617    2.50
996612    0.21
996604    0.70
996583    0.45
995811    0.45
99514     0.35
995130    0.47
99444     1.00
994326    0.35
Name: Umur_murs_exterieur_top, dtype: float64

Unnamed: 0,td007_paroi_opaque_id,td006_batiment_id,tr014_type_parois_opaque_id,reference,deperdition_thermique,tv001_coefficient_reduction_deperditions_id,tv002_local_non_chauffe_id,Umur,coefficient_transmission_thermique_paroi_non_isolee,tv003_umur_id,...,surface_paroi_opaque_exterieur_infer,tv003_periode_isolation_uniforme,tv003_label_isolation_uniforme,annee_isole_uniforme_min,annee_isole_uniforme_max,meth_calc_U,isolation,type_adjacence,c5cae616-17ea-4e72-a3d7-082f213a44ea,783110a4-e2d9-40ec-833a-f4702abef10e
0,688,135,1,Mur 1,12.33,1,,0.35,0.0,,...,35.22,,,,,EPAISSEUR ISOLATION SAISIE,ISOLE SAISI,EXTERIEUR,35.22,35.22
1,689,135,1,Mur 2,11.61,1,,0.35,0.0,,...,33.17,,,,,EPAISSEUR ISOLATION SAISIE,ISOLE SAISI,EXTERIEUR,33.17,33.17
2,690,135,1,Mur 3,13.76,1,,0.35,0.0,,...,39.31,,,,,EPAISSEUR ISOLATION SAISIE,ISOLE SAISI,EXTERIEUR,39.31,39.31
3,691,135,1,Mur 4,10.87,1,,0.35,0.0,,...,31.06,,,,,EPAISSEUR ISOLATION SAISIE,ISOLE SAISI,EXTERIEUR,31.06,31.06
6,1490,307,1,Mur extérieur Sud,10.62,,,0.43,0.0,,...,24.89,,,,,EPAISSEUR ISOLATION SAISIE,ISOLE SAISI,EXTERIEUR,24.89,24.89
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237478,24114980,5419675,1,MUR n°1,12.00,1,,0.36,,52,...,32.75,,isolé periode isolation :à partir de 2006,2006,2900,PAR DEFAUT PERIODE : ISOLE,ISOLE DEFAUT POST 1988,EXTERIEUR,32.75,32.75
237479,24114981,5419675,1,MUR n°1,10.00,1,,0.36,,52,...,27.93,,isolé periode isolation :à partir de 2006,2006,2900,PAR DEFAUT PERIODE : ISOLE,ISOLE DEFAUT POST 1988,EXTERIEUR,27.93,27.93
237480,24114982,5419675,1,MUR n°1,8.00,1,,0.36,,52,...,22.98,,isolé periode isolation :à partir de 2006,2006,2900,PAR DEFAUT PERIODE : ISOLE,ISOLE DEFAUT POST 1988,EXTERIEUR,22.98,22.98
237481,24114983,5419675,1,MUR n°1,5.00,1,,0.36,,52,...,12.75,,isolé periode isolation :à partir de 2006,2006,2900,PAR DEFAUT PERIODE : ISOLE,ISOLE DEFAUT POST 1988,EXTERIEUR,12.75,12.75


In [98]:
td001_enveloppe_agg

Unnamed: 0_level_0,Umurs_ext_avg,Umurs_deper_avg,Uplancher_bas_deper_avg,Uplancher_haut_deper_avg,is_plancher_bas_deper_isole,is_plancher_haut_deper_isole,is_murs_ext_isole,is_murs_deper_isole,mat_murs_deper_top,mat_murs_ext_top,...,surfaces_vitree_orientee_est,surfaces_vitree_orientee_est_ou_ouest,surfaces_vitree_orientee_horizontale,surfaces_vitree_orientee_nondef,surfaces_vitree_orientee_nord,surfaces_vitree_orientee_ouest,surfaces_vitree_orientee_sud,ratio_surface_vitree_exterieur,ratio_surface_vitree_deperditif,ratio_surface_vitree_total
td001_dpe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000142,,0.707,0.700,0.150,Oui,Oui,Oui,Oui,Murs en blocs de béton creux,Murs en blocs de béton creux,...,1.44,,,,2.46,2.88,6.72,,0.096,0.096
1000989,1.75,1.750,1.418,0.210,Non,Oui,Non,Non,,,...,,,,,,5.05,5.21,0.107,0.107,0.107
1001934,0.45,0.450,0.416,0.250,Terre Plein,Oui,Oui,Oui,Murs en blocs de béton creux,Murs en blocs de béton creux,...,,,,,,1.87,10.42,0.137,0.127,0.127
1002065,0.70,0.700,,,,,Oui,Oui,Murs en béton banché,Murs en béton banché,...,12.43,,,,3.74,,,0.604,0.440,0.440
100207,0.30,0.304,0.590,0.232,Oui,Oui,Oui,Oui,Murs en blocs de béton creux,Murs en blocs de béton creux,...,1.42,,,,8.30,0.37,8.64,0.135,0.128,0.128
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
968172,,,2.000,2.500,Non,Non,,,,,...,,,,,,,,,,
986070,,,,,Non,Oui,,,,,...,,,,,,,,,,
987387,,,,,Non,Oui,,,,,...,,,,,,,,,,
988770,,,,,Non,Oui,,,,,...,,,,,,,,,,
