# Binning

## Initialisierung

In [56]:
import pandas as pd

In [57]:
df = pd.DataFrame({"alter": [17, 10, 24, 45, 43, 45, 50, 70]})

# Feste Breite

In [58]:
bins = [0,20,40,60,80]
df["fix_manuell"] = pd.cut(df.alter, bins=bins)
df

Unnamed: 0,alter,fix_manuell
0,17,"(0, 20]"
1,10,"(0, 20]"
2,24,"(20, 40]"
3,45,"(40, 60]"
4,43,"(40, 60]"
5,45,"(40, 60]"
6,50,"(40, 60]"
7,70,"(60, 80]"


In [59]:
df["fix_auto"] = pd.cut(df.alter, bins=4)
df

Unnamed: 0,alter,fix_manuell,fix_auto
0,17,"(0, 20]","(9.94, 25.0]"
1,10,"(0, 20]","(9.94, 25.0]"
2,24,"(20, 40]","(9.94, 25.0]"
3,45,"(40, 60]","(40.0, 55.0]"
4,43,"(40, 60]","(40.0, 55.0]"
5,45,"(40, 60]","(40.0, 55.0]"
6,50,"(40, 60]","(40.0, 55.0]"
7,70,"(60, 80]","(55.0, 70.0]"


In [60]:
df["fix_manuell_label"] = pd.cut(df.alter, bins=bins, labels=False)
df["fix_auto_label"] = pd.cut(df.alter, bins=4, labels=False)
df

Unnamed: 0,alter,fix_manuell,fix_auto,fix_manuell_label,fix_auto_label
0,17,"(0, 20]","(9.94, 25.0]",0,0
1,10,"(0, 20]","(9.94, 25.0]",0,0
2,24,"(20, 40]","(9.94, 25.0]",1,0
3,45,"(40, 60]","(40.0, 55.0]",2,2
4,43,"(40, 60]","(40.0, 55.0]",2,2
5,45,"(40, 60]","(40.0, 55.0]",2,2
6,50,"(40, 60]","(40.0, 55.0]",2,2
7,70,"(60, 80]","(55.0, 70.0]",3,3


## Adaptive Breite (Quantile)

In [61]:
df["quantil"] = pd.qcut(df.alter, q=4)
df

Unnamed: 0,alter,fix_manuell,fix_auto,fix_manuell_label,fix_auto_label,quantil
0,17,"(0, 20]","(9.94, 25.0]",0,0,"(9.999, 22.25]"
1,10,"(0, 20]","(9.94, 25.0]",0,0,"(9.999, 22.25]"
2,24,"(20, 40]","(9.94, 25.0]",1,0,"(22.25, 44.0]"
3,45,"(40, 60]","(40.0, 55.0]",2,2,"(44.0, 46.25]"
4,43,"(40, 60]","(40.0, 55.0]",2,2,"(22.25, 44.0]"
5,45,"(40, 60]","(40.0, 55.0]",2,2,"(44.0, 46.25]"
6,50,"(40, 60]","(40.0, 55.0]",2,2,"(46.25, 70.0]"
7,70,"(60, 80]","(55.0, 70.0]",3,3,"(46.25, 70.0]"


In [62]:
df["quantil_label"] = pd.qcut(df.alter, q=4, labels=["q1", "q2", "q3", "q4"])
df

Unnamed: 0,alter,fix_manuell,fix_auto,fix_manuell_label,fix_auto_label,quantil,quantil_label
0,17,"(0, 20]","(9.94, 25.0]",0,0,"(9.999, 22.25]",q1
1,10,"(0, 20]","(9.94, 25.0]",0,0,"(9.999, 22.25]",q1
2,24,"(20, 40]","(9.94, 25.0]",1,0,"(22.25, 44.0]",q2
3,45,"(40, 60]","(40.0, 55.0]",2,2,"(44.0, 46.25]",q3
4,43,"(40, 60]","(40.0, 55.0]",2,2,"(22.25, 44.0]",q2
5,45,"(40, 60]","(40.0, 55.0]",2,2,"(44.0, 46.25]",q3
6,50,"(40, 60]","(40.0, 55.0]",2,2,"(46.25, 70.0]",q4
7,70,"(60, 80]","(55.0, 70.0]",3,3,"(46.25, 70.0]",q4


In [68]:
df["quantil"].value_counts()

(46.25, 70.0]     2
(44.0, 46.25]     2
(22.25, 44.0]     2
(9.999, 22.25]    2
Name: quantil, dtype: int64