`Series.value_counts()`

- Syntax: `series.value_counts(normalize = False, sort = True, ascending = False, bins = None, dropna = True)`
- Parameter:
    - normalize: relative frequencies vs aboslute frequencies 
    - sort
    - ascending: order
    - bins
        - group them into half-open bins, a convenience for `pd.cut`
        - only works with numeric data
    - dropna: include NaN vs. Don't include

In [1]:
import pandas as pd
import numpy as np

In [23]:
fruits = pd.Series(["kiwi", "mango", "strawberry", "pineapple", "gala apple", "honeycrisp apple", "tomato", "watermelon", "honeydew", "kiwi", "kiwi", "kiwi", "mango", "blueberry", "blackberry", "gooseberry", "papaya"])
print(fruits)
print(type(fruits))

0                 kiwi
1                mango
2           strawberry
3            pineapple
4           gala apple
5     honeycrisp apple
6               tomato
7           watermelon
8             honeydew
9                 kiwi
10                kiwi
11                kiwi
12               mango
13           blueberry
14          blackberry
15          gooseberry
16              papaya
dtype: object
<class 'pandas.core.series.Series'>


### Relative frequencies vs Aboslute frequencies 

In [24]:
fruits.value_counts()

kiwi                4
mango               2
tomato              1
watermelon          1
pineapple           1
blackberry          1
honeydew            1
honeycrisp apple    1
gala apple          1
gooseberry          1
strawberry          1
blueberry           1
papaya              1
dtype: int64

In [25]:
fruits.value_counts(normalize = True)

kiwi                0.235294
mango               0.117647
tomato              0.058824
watermelon          0.058824
pineapple           0.058824
blackberry          0.058824
honeydew            0.058824
honeycrisp apple    0.058824
gala apple          0.058824
gooseberry          0.058824
strawberry          0.058824
blueberry           0.058824
papaya              0.058824
dtype: float64

### Ordered by default vs. Not ordered

In [26]:
fruits.value_counts()

kiwi                4
mango               2
tomato              1
watermelon          1
pineapple           1
blackberry          1
honeydew            1
honeycrisp apple    1
gala apple          1
gooseberry          1
strawberry          1
blueberry           1
papaya              1
dtype: int64

In [8]:
fruits.value_counts(sort = False)

papaya              1
blueberry           1
strawberry          1
gooseberry          1
gala apple          1
honeycrisp apple    1
kiwi                4
honeydew            1
blackberry          1
pineapple           1
watermelon          1
mango               2
tomato              1
dtype: int64

### Ascending order vs. desceding order

In [9]:
fruits.value_counts()

kiwi                4
mango               2
tomato              1
watermelon          1
pineapple           1
blackberry          1
honeydew            1
honeycrisp apple    1
gala apple          1
gooseberry          1
strawberry          1
blueberry           1
papaya              1
dtype: int64

In [10]:
fruits.value_counts(ascending = True)

papaya              1
blueberry           1
strawberry          1
gooseberry          1
gala apple          1
honeycrisp apple    1
honeydew            1
blackberry          1
pineapple           1
watermelon          1
tomato              1
mango               2
kiwi                4
dtype: int64

### Bins vs. None (Numeric data only)

In [12]:
numbers = pd.Series([2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23, 256, -8, -4, -2, 5, -9])

In [13]:
numbers.value_counts()

 5      2
-4      1
 8      1
-2      1
 2      1
 3      1
 4      1
 6      1
 7      1
 9      1
-8      1
 10     1
 11     1
-9      1
 13     1
 17     1
 19     1
 23     1
 256    1
dtype: int64

In [16]:
numbers.value_counts(bins = 4)

(-9.266, 57.25]    19
(189.75, 256.0]     1
(123.5, 189.75]     0
(57.25, 123.5]      0
dtype: int64

In [18]:
pd.cut(numbers, 4).value_counts()

(-9.265, 57.25]    19
(189.75, 256.0]     1
(123.5, 189.75]     0
(57.25, 123.5]      0
dtype: int64

### `numbers.value_counts(bins = 4)` return the same result as `pd.cut(numbers, 4).value_counts()`

### Drop none vs. include none

In [28]:
numbers2 = pd.Series([100, 214, 325, 88, None, 325, None, 325, 100])
numbers2

0    100.0
1    214.0
2    325.0
3     88.0
4      NaN
5    325.0
6      NaN
7    325.0
8    100.0
dtype: float64

In [29]:
numbers2.value_counts()

325.0    3
100.0    2
88.0     1
214.0    1
dtype: int64

In [30]:
numbers2.value_counts(dropna = False)

325.0    3
NaN      2
100.0    2
88.0     1
214.0    1
dtype: int64