In [1]:
import pandas as pd
import numpy as np

In [2]:
lmh_values = ["low", "high", "medium", "medium", "high"]
lmh_cat = pd.Categorical(lmh_values)
lmh_cat

['low', 'high', 'medium', 'medium', 'high']
Categories (3, object): ['high', 'low', 'medium']

In [3]:
lmh_cat.categories

Index(['high', 'low', 'medium'], dtype='object')

In [4]:
lmh_cat.get_values()

array(['low', 'high', 'medium', 'medium', 'high'], dtype=object)

In [4]:
#codes shows the integer mapping for each value
lmh_cat.codes

array([1, 0, 2, 2, 0], dtype=int8)

In [6]:
lmh_cat.sort_values()

[high, high, low, medium, medium]
Categories (3, object): [high, low, medium]

In [6]:
np.random.seed(123456)
values = np.random.randint(0, 100, 5)
bins = pd.DataFrame({ "Values": values})
bins

Unnamed: 0,Values
0,65
1,49
2,56
3,43
4,43


In [7]:
bins['Group'] = pd.cut(values, range(0, 101, 10))
bins

Unnamed: 0,Values,Group
0,65,"(60, 70]"
1,49,"(40, 50]"
2,56,"(50, 60]"
3,43,"(40, 50]"
4,43,"(40, 50]"


In [10]:
metal_values = ["bronze", "gold", "silver", "bronze"]
metal_categories = ["bronze", "silver", "gold"]
metals = pd.Categorical(metal_values,
                        categories=metal_categories,
                        ordered = True)
metals

['bronze', 'gold', 'silver', 'bronze']
Categories (3, object): ['bronze' < 'silver' < 'gold']

In [11]:
metals.describe()

Unnamed: 0_level_0,counts,freqs
categories,Unnamed: 1_level_1,Unnamed: 2_level_1
bronze,2,0.5
silver,1,0.25
gold,1,0.25


In [12]:
metals.value_counts()

bronze    2
silver    1
gold      1
dtype: int64

In [13]:
(metals.min(), metals.max(), metals.mode())

('bronze', 'gold', ['bronze']
 Categories (3, object): ['bronze' < 'silver' < 'gold'])

In [14]:
metals.codes

array([0, 2, 1, 0], dtype=int8)

In [15]:
#compare the metal
metals.codes[0]>metals.codes[1], metals.codes[1]>metals.codes[2]

(False, True)

In [16]:
# add a new platimnum category
with_platinum = metals.add_categories(["platinum"])
with_platinum

['bronze', 'gold', 'silver', 'bronze']
Categories (4, object): ['bronze' < 'silver' < 'gold' < 'platinum']

In [18]:
# remove any unused categories (in this case, platinum)
with_platinum.remove_unused_categories()

['bronze', 'gold', 'silver', 'bronze']
Categories (3, object): ['bronze' < 'silver' < 'gold']

In [20]:
#remove bronze category
no_bronze = metals.remove_categories(["bronze"])
no_bronze

[NaN, 'gold', 'silver', NaN]
Categories (2, object): ['silver' < 'gold']

In [22]:
#rename
cat = pd.Categorical(["a","b","c","a"],categories=["a", "b", "c"])
cat.categories = ["bronze", "silver", "gold"]
cat

['bronze', 'silver', 'gold', 'bronze']
Categories (3, object): ['bronze', 'silver', 'gold']

In [23]:
cat.rename_categories(["x", "y", "z"])

['x', 'y', 'z', 'x']
Categories (3, object): ['x', 'y', 'z']

In [24]:
np.random.seed(123456)
names = ['Ivana', 'Norris', 'Ruth', 'Lane', 'Skye', 'Sol', 
         'Dylan', 'Katina', 'Alissa', "Marc"]
grades = np.random.randint(50, 101, len(names))
scores = pd.DataFrame({'Name': names, 'Grade': grades})
scores

Unnamed: 0,Name,Grade
0,Ivana,51
1,Norris,92
2,Ruth,100
3,Lane,99
4,Skye,93
5,Sol,97
6,Dylan,93
7,Katina,77
8,Alissa,82
9,Marc,73


In [26]:
score_bins =    [ 0,  59,   62,  66,   69,   72,  76,   79,   82,  
                 86,   89,   92,  99, 100]
letter_grades = ['F', 'D-', 'D', 'D+', 'C-', 'C', 'C+', 'B-', 'B', 
                 'B+', 'A-', 'A', 'A+']
letter_cats = pd.cut(scores.Grade, score_bins, labels=letter_grades)
scores['Letter'] = letter_cats
scores

Unnamed: 0,Name,Grade,Letter
0,Ivana,51,F
1,Norris,92,A-
2,Ruth,100,A+
3,Lane,99,A
4,Skye,93,A
5,Sol,97,A
6,Dylan,93,A
7,Katina,77,C+
8,Alissa,82,B-
9,Marc,73,C


In [27]:
letter_cats

0     F
1    A-
2    A+
3     A
4     A
5     A
6     A
7    C+
8    B-
9     C
Name: Grade, dtype: category
Categories (13, object): ['F' < 'D-' < 'D' < 'D+' ... 'B+' < 'A-' < 'A' < 'A+']

In [28]:
scores.Letter.value_counts()

A     4
F     1
C     1
C+    1
B-    1
A-    1
A+    1
D-    0
D     0
D+    0
C-    0
B     0
B+    0
Name: Letter, dtype: int64

In [30]:
scores.sort_values(by=['Letter'], ascending=False)

Unnamed: 0,Name,Grade,Letter
2,Ruth,100,A+
3,Lane,99,A
4,Skye,93,A
5,Sol,97,A
6,Dylan,93,A
1,Norris,92,A-
8,Alissa,82,B-
7,Katina,77,C+
9,Marc,73,C
0,Ivana,51,F
