`pd.cut()` 
- To seperate the array elements into different bins
- Mainly used to perform statistical analysis on scalar data
- Syntax: 
    -`pd.cut(x, bins, right = True, labels = None, retbins = False, precision = 3, include_lowest = False, duplicates = "raise")`
- Parameters: 
    - x: the input array. Must be 1-dimentional.
    - bins: defines the bin edge for the segmentation
    - right: indicates whether bins includes the rightmost edge or not. 
    - labels: specifies the labels for the returned bins. 
    - retbins: whether to return the bins or not. Useful when bins is provided as a scalar. 

In [9]:
import pandas as pd
import numpy as np
numbers = pd.Series([1,2,3,4,5,6,7,8,9])
numbers

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
8    9
dtype: int64

### Bins

In [16]:
pd.cut(numbers, 4)

0    (0.992, 3.0]
1    (0.992, 3.0]
2    (0.992, 3.0]
3      (3.0, 5.0]
4      (3.0, 5.0]
5      (5.0, 7.0]
6      (5.0, 7.0]
7      (7.0, 9.0]
8      (7.0, 9.0]
dtype: category
Categories (4, interval[float64]): [(0.992, 3.0] < (3.0, 5.0] < (5.0, 7.0] < (7.0, 9.0]]

In [19]:
pd.cut(numbers, bins = [1,5,9])

0           NaN
1    (1.0, 5.0]
2    (1.0, 5.0]
3    (1.0, 5.0]
4    (1.0, 5.0]
5    (5.0, 9.0]
6    (5.0, 9.0]
7    (5.0, 9.0]
8    (5.0, 9.0]
dtype: category
Categories (2, interval[int64]): [(1, 5] < (5, 9]]

### right = True (default) vs. right = False

In [20]:
pd.cut(numbers, bins = [1,5,9])

0           NaN
1    (1.0, 5.0]
2    (1.0, 5.0]
3    (1.0, 5.0]
4    (1.0, 5.0]
5    (5.0, 9.0]
6    (5.0, 9.0]
7    (5.0, 9.0]
8    (5.0, 9.0]
dtype: category
Categories (2, interval[int64]): [(1, 5] < (5, 9]]

In [21]:
pd.cut(numbers, bins = [1,5,9], right = False)

0    [1.0, 5.0)
1    [1.0, 5.0)
2    [1.0, 5.0)
3    [1.0, 5.0)
4    [5.0, 9.0)
5    [5.0, 9.0)
6    [5.0, 9.0)
7    [5.0, 9.0)
8           NaN
dtype: category
Categories (2, interval[int64]): [[1, 5) < [5, 9)]

### Labels

In [12]:
pd.cut(numbers, 2)

0    (0.992, 5.0]
1    (0.992, 5.0]
2    (0.992, 5.0]
3    (0.992, 5.0]
4    (0.992, 5.0]
5      (5.0, 9.0]
6      (5.0, 9.0]
7      (5.0, 9.0]
8      (5.0, 9.0]
dtype: category
Categories (2, interval[float64]): [(0.992, 5.0] < (5.0, 9.0]]

In [15]:
pd.cut(numbers, 2, labels = ['< 5','>=5'])

0    < 5
1    < 5
2    < 5
3    < 5
4    < 5
5    >=5
6    >=5
7    >=5
8    >=5
dtype: category
Categories (2, object): [< 5 < >=5]

In [48]:
pd.cut(numbers, 4, labels = False)

0    0
1    0
2    0
3    1
4    1
5    2
6    2
7    3
8    3
dtype: int64

### retbins

In [22]:
pd.cut(numbers, 2)

0    (0.992, 5.0]
1    (0.992, 5.0]
2    (0.992, 5.0]
3    (0.992, 5.0]
4    (0.992, 5.0]
5      (5.0, 9.0]
6      (5.0, 9.0]
7      (5.0, 9.0]
8      (5.0, 9.0]
dtype: category
Categories (2, interval[float64]): [(0.992, 5.0] < (5.0, 9.0]]

In [24]:
pd.cut(numbers, 2, retbins = True)

(0    (0.992, 5.0]
 1    (0.992, 5.0]
 2    (0.992, 5.0]
 3    (0.992, 5.0]
 4    (0.992, 5.0]
 5      (5.0, 9.0]
 6      (5.0, 9.0]
 7      (5.0, 9.0]
 8      (5.0, 9.0]
 dtype: category
 Categories (2, interval[float64]): [(0.992, 5.0] < (5.0, 9.0]],
 array([0.992, 5.   , 9.   ]))

### Precision

In [36]:
pd.cut(numbers, 5)

0    (0.992, 2.6]
1    (0.992, 2.6]
2      (2.6, 4.2]
3      (2.6, 4.2]
4      (4.2, 5.8]
5      (5.8, 7.4]
6      (5.8, 7.4]
7      (7.4, 9.0]
8      (7.4, 9.0]
dtype: category
Categories (5, interval[float64]): [(0.992, 2.6] < (2.6, 4.2] < (4.2, 5.8] < (5.8, 7.4] < (7.4, 9.0]]

In [39]:
pd.cut(numbers, 5, precision = 0)

0    (1.0, 3.0]
1    (1.0, 3.0]
2    (3.0, 4.0]
3    (3.0, 4.0]
4    (4.0, 6.0]
5    (6.0, 7.0]
6    (6.0, 7.0]
7    (7.0, 9.0]
8    (7.0, 9.0]
dtype: category
Categories (5, interval[float64]): [(1.0, 3.0] < (3.0, 4.0] < (4.0, 6.0] < (6.0, 7.0] < (7.0, 9.0]]

### Include_lowest

In [43]:
pd.cut(numbers, [1,5,9])

0           NaN
1    (1.0, 5.0]
2    (1.0, 5.0]
3    (1.0, 5.0]
4    (1.0, 5.0]
5    (5.0, 9.0]
6    (5.0, 9.0]
7    (5.0, 9.0]
8    (5.0, 9.0]
dtype: category
Categories (2, interval[int64]): [(1, 5] < (5, 9]]

In [42]:
pd.cut(numbers, [1,5,9], include_lowest = True)

0    (0.999, 5.0]
1    (0.999, 5.0]
2    (0.999, 5.0]
3    (0.999, 5.0]
4    (0.999, 5.0]
5      (5.0, 9.0]
6      (5.0, 9.0]
7      (5.0, 9.0]
8      (5.0, 9.0]
dtype: category
Categories (2, interval[float64]): [(0.999, 5.0] < (5.0, 9.0]]

### duplicates