In [1]:
import numpy as np 
import pandas as pd 

In [2]:
obj = pd.Series(["c", "a", "d", "a", "a", "b", "b", "c", "c"])

uniques = obj.unique()

uniques # not necessarily returned in the order in which they first appear, and not in sorted order

array(['c', 'a', 'd', 'b'], dtype=object)

In [3]:
obj.value_counts() # computes frequencies

c    3
a    3
b    2
d    1
Name: count, dtype: int64

In [4]:
#  top level pandas method

# Sample pandas Series
data = pd.Series(["apple", "banana", "apple", "orange", "banana", "apple"])

# Count unique values without sorting by frequency
result = pd.value_counts(data.to_numpy(), sort=False)

print(result)


apple     3
banana    2
orange    1
Name: count, dtype: int64


  result = pd.value_counts(data.to_numpy(), sort=False)


In [5]:
# isin performs a vectorized set membership check and can be useful in filtering a dataset down to a subset of values in a Series or columns in a DataFrame

obj

0    c
1    a
2    d
3    a
4    a
5    b
6    b
7    c
8    c
dtype: object

In [6]:
mask = obj.isin(['b', 'c'])
mask

0     True
1    False
2    False
3    False
4    False
5     True
6     True
7     True
8     True
dtype: bool

In [7]:
obj[mask]

0    c
5    b
6    b
7    c
8    c
dtype: object

In [8]:
# Related to isin is the Index.get_indexer method, 
# which gives you an index array from an array of possibly nondistinct values into another array of distinct values:

to_match = pd.Series(["c", "a", "b", "b", "c", "a"])
unique_vals = pd.Series(['c', 'b', 'a'])

indices = pd.Index(unique_vals).get_indexer(to_match)
indices 

array([0, 2, 1, 1, 0, 2])

In [9]:
# Ex. to compute a histogram on multiple related columns in dataFrame

data = pd.DataFrame({"Qu1": [1, 3, 4, 3, 4], "Qu2": [2, 3, 1, 2, 3], "Qu3": [1, 5, 2, 4, 4]})
data

Unnamed: 0,Qu1,Qu2,Qu3
0,1,2,1
1,3,3,5
2,4,1,2
3,3,2,4
4,4,3,4


In [11]:
# compute the value count for a single column
data['Qu1'].value_counts().sort_index()

Qu1
1    1
3    2
4    2
Name: count, dtype: int64

In [14]:
# to compute for all columns, pass pandas.value_counts to the dataFrame apply method

result = data.apply(pd.value_counts).fillna('fuckyou')
result

  result = data.apply(pd.value_counts).fillna('fuckyou')


Unnamed: 0,Qu1,Qu2,Qu3
1,1.0,1.0,1.0
2,fuckyou,2.0,1.0
3,2.0,2.0,fuckyou
4,2.0,fuckyou,2.0
5,fuckyou,fuckyou,1.0


In [15]:
data = pd.DataFrame({"a": [1, 1, 1, 2, 2], "b": [0, 0, 1, 0, 0]})
data

Unnamed: 0,a,b
0,1,0
1,1,0
2,1,1
3,2,0
4,2,0


In [17]:
data.value_counts() # later

a  b
1  0    2
2  0    2
1  1    1
Name: count, dtype: int64