In [1]:
# example of group by/flattening operations


In [2]:
import pandas as pd


In [3]:
def manual_agg_func(dg):
    dg_cnt = dg["marker"].count()
    dg_max = dg["marker"].max()
    dg_min = dg["marker"].min()
    dg_mean = dg["marker"].mean()
    return pd.Series([dg_cnt, dg_max, dg_min, dg_mean], index=["marker_count",
                                                               "marker_max",
                                                               "marker_min",
                                                               "marker_mean"])


In [4]:
df = pd.DataFrame.from_dict({"id":[1, 1, 1, 2, 2, 2],
                             "old":["a", "b", "c", "d", "e", "f"],
                             "marker":[1, 2, 3, 1, 2, 3]})
df


Unnamed: 0,id,old,marker
0,1,a,1
1,1,b,2
2,1,c,3
3,2,d,1
4,2,e,2
5,2,f,3


In [5]:
# basic groupby  - how to get at sensible data slices


In [6]:
df_grp = df.groupby("id").agg({"marker":["count", "min", "max"],
                               "old":"count"})
df_grp


Unnamed: 0_level_0,marker,marker,marker,old
Unnamed: 0_level_1,count,min,max,count
id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,3,1,3,3
2,3,1,3,3


In [7]:
df_grp_ri = df_grp.reset_index() # doesn"t flatten
df_grp_ri


Unnamed: 0_level_0,id,marker,marker,marker,old
Unnamed: 0_level_1,Unnamed: 1_level_1,count,min,max,count
0,1,3,1,3,3
1,2,3,1,3,3


In [8]:
# solution: renaming columns


In [9]:
dfg_cp = df_grp.copy()
dfg_cp


Unnamed: 0_level_0,marker,marker,marker,old
Unnamed: 0_level_1,count,min,max,count
id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,3,1,3,3
2,3,1,3,3


In [10]:
dfg_cp.columns = [("_".join(col).strip()).rstrip("_") for col in dfg_cp.columns.values]
dfg_cp


Unnamed: 0_level_0,marker_count,marker_min,marker_max,old_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,3,1,3,3
2,3,1,3,3


In [11]:
dfg_cp = dfg_cp.reset_index()
dfg_cp


Unnamed: 0,id,marker_count,marker_min,marker_max,old_count
0,1,3,1,3,3
1,2,3,1,3,3


In [12]:
# df_grp: lvl0: marker


In [13]:
# df_grp lvl1: count mean max min


In [14]:
idx = pd.IndexSlice
idx


<pandas.core.indexing._IndexSlice at 0x7f4677a96ba8>

In [15]:
dftmp = df_grp.loc[idx[:], idx[:, "max"]]
dftmp


Unnamed: 0_level_0,marker
Unnamed: 0_level_1,max
id,Unnamed: 1_level_2
1,3
2,3


In [16]:
dftmp = df_grp.loc[idx[:], idx["marker", "max"]]
dftmp


id
1    3
2    3
Name: (marker, max), dtype: int64

In [17]:
dftmp = df_grp.loc[idx[:], idx[:, "count"]]
dftmp


Unnamed: 0_level_0,marker,old
Unnamed: 0_level_1,count,count
id,Unnamed: 1_level_2,Unnamed: 2_level_2
1,3,3
2,3,3


In [18]:
# this method doesn"t require reset inde


In [19]:
dfi_F = df.groupby("id", as_index=False).apply(manual_agg_func)
dfi_F


Unnamed: 0,marker_count,marker_max,marker_min,marker_mean
0,3.0,3.0,1.0,2.0
1,3.0,3.0,1.0,2.0


In [20]:
dfi_T = df.groupby("id", as_index=True).apply(manual_agg_func).reset_index()
dfi_T


Unnamed: 0,id,marker_count,marker_max,marker_min,marker_mean
0,1,3.0,3.0,1.0,2.0
1,2,3.0,3.0,1.0,2.0


In [21]:
#