# Object Creation
Categorical object can be created in multiple ways. The different ways have been described below −

# category
By specifying the dtype as "category" in pandas object creation.

In [2]:
import pandas as pd

s = pd.Series(["a","b","c","a"], dtype="category")
print(s)

# The number of elements passed to the series object is four, but the categories are only three. 
# Observe the same in the output Categories.

0    a
1    b
2    c
3    a
dtype: category
Categories (3, object): [a, b, c]


# pd.Categorical
Using the standard pandas Categorical constructor, we can create a category object.

pandas.Categorical(values, categories, ordered)

In [3]:
import pandas as pd

cat = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'])
print(cat)

[a, b, c, a, b, c]
Categories (3, object): [a, b, c]


In [5]:
import pandas as pd

cat = cat=pd.Categorical(['a','b','c','a','b','c','d'], ['c', 'b', 'a'])
print(cat)

# Here, the second argument signifies the categories. 
# Thus, any value which is not present in the categories will be treated as NaN.

[a, b, c, a, b, c, NaN]
Categories (3, object): [c, b, a]


In [7]:
import pandas as pd

cat = cat=pd.Categorical(['a','b','c','a','b','c','d'], ['c', 'b', 'a'],ordered=True)
print(cat)

# Logically, the order means that, a is greater than b and b is greater than c.

[a, b, c, a, b, c, NaN]
Categories (3, object): [c < b < a]


# Description
Using the .describe() command on the categorical data, we get similar output to a Series or DataFrame of the type string.

In [10]:
import pandas as pd
import numpy as np

cat = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c"])
df = pd.DataFrame({"cat":cat, "s":["a", "c", "c", np.nan]})

print(df.describe())
print("--------------")
print(df["cat"].describe())

       cat  s
count    3  3
unique   2  2
top      c  c
freq     2  2
--------------
count     3
unique    2
top       c
freq      2
Name: cat, dtype: object


# Get the Properties of the Category
obj.cat.categories command is used to get the categories of the object.

In [13]:
import pandas as pd
import numpy as np

s = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c"])
print(s.categories)

Index(['b', 'a', 'c'], dtype='object')


obj.ordered command is used to get the order of the object.

In [15]:
import pandas as pd
import numpy as np

cat = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c"])
print(cat.ordered)

# The function returned false because we haven't specified any order.

False


# Renaming Categories
Renaming categories is done by assigning new values to the series.cat.categoriesseries.cat.categories property.

In [21]:
import pandas as pd

s = pd.Series(["a","b","c","a"], dtype="category")
s.cat.categories = ["Group %s" % i for i in s.cat.categories]
print(s.cat.categories)

# Initial categories [a,b,c] are updated by the s.cat.categories property of the object.

Index(['Group a', 'Group b', 'Group c'], dtype='object')


# Appending New Categories
Using the Categorical.add.categories() method, new categories can be appended.

In [22]:
import pandas as pd

s = pd.Series(["a","b","c","a"], dtype="category")
s = s.cat.add_categories([4])
print(s.cat.categories)

Index(['a', 'b', 'c', 4], dtype='object')


# Removing Categories
Using the Categorical.remove_categories() method, unwanted categories can be removed.

In [23]:
import pandas as pd

s = pd.Series(["a","b","c","a"], dtype="category")
print ("Original object:")
print(s)

print ("After removal:")
print(s.cat.remove_categories("a"))

Original object:
0    a
1    b
2    c
3    a
dtype: category
Categories (3, object): [a, b, c]
After removal:
0    NaN
1      b
2      c
3    NaN
dtype: category
Categories (2, object): [b, c]
