# Data Clearing: Bucketization 

In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import KBinsDiscretizer

## Preparing Data

In [15]:
data = {"age": [10, 60, 15, 35, 25, 30, 20, 55, 12]}
bins = [0, 18, 25, 40, 60]
labels = ['Kind', 'Jung', 'Mittel', 'Alt']

df = pd.DataFrame(data)

### Example with Pandas

In [16]:
df_pd = df.copy()
df_pd['group'] = pd.cut(df_pd['age'], bins=bins, labels=labels)
df_pd

Unnamed: 0,age,group
0,10,Kind
1,60,Alt
2,15,Kind
3,35,Mittel
4,25,Jung
5,30,Mittel
6,20,Jung
7,55,Alt
8,12,Kind


### Example with SciKit Learn

In [25]:
df_sk = df.copy()

discretizer = KBinsDiscretizer(n_bins=4, encode='ordinal', strategy='uniform')

df_sk['group'] = discretizer.fit_transform(df_sk[['age']])
df_sk['group'] = df_sk['group'].map(dict(enumerate(labels)))
df_sk

Unnamed: 0,age,group
0,10,Kind
1,60,Alt
2,15,Kind
3,35,Mittel
4,25,Jung
5,30,Jung
6,20,Kind
7,55,Alt
8,12,Kind
