In [1]:
import numpy as np

from sklearn.preprocessing import OneHotEncoder

In [2]:
X = np.array([["A"],["A"],["B"],["C"]])

In [3]:
X

array([['A'],
       ['A'],
       ['B'],
       ['C']], dtype='<U1')

In [4]:
enc = OneHotEncoder()

In [5]:
enc.fit_transform(X).todense()

matrix([[1., 0., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

# Output with dense matrix

In [6]:
enc = OneHotEncoder(sparse=False)

In [7]:
enc.fit_transform(X)

array([[1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

# Removing one dummy variable

In [8]:
enc = OneHotEncoder(sparse=False, drop='first')

In [9]:
enc.fit_transform(X)

array([[0., 0.],
       [0., 0.],
       [1., 0.],
       [0., 1.]])

# Removing one dummy variable from binary features

In [10]:
X = np.array([["A"],["A"],["A"],["C"]])

In [11]:
X

array([['A'],
       ['A'],
       ['A'],
       ['C']], dtype='<U1')

In [12]:
enc = OneHotEncoder(sparse=False, drop="if_binary")
enc.fit_transform(X)

array([[0.],
       [0.],
       [0.],
       [1.]])

# Error handling

In [13]:
enc = OneHotEncoder(sparse=False, handle_unknown = 'error')

enc.fit(X)

OneHotEncoder(sparse=False)

In [14]:
enc.transform(X)

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.]])

In [15]:
Y = np.array([["A"],["A"],["B"],["C"]])

In [16]:
enc.transform(Y)

ValueError: Found unknown categories ['B'] in column 0 during transform

In [17]:
enc = OneHotEncoder(sparse=False, handle_unknown = 'ignore')

In [18]:
enc.fit(X)

OneHotEncoder(handle_unknown='ignore', sparse=False)

In [19]:
enc.transform(Y)

array([[1., 0.],
       [1., 0.],
       [0., 0.],
       [0., 1.]])

In [20]:
X = [["A","X"],["B","Y"],["C","Z"]]

In [21]:
enc = OneHotEncoder(sparse=False, categories = [["A","B","C","D"], ["X","Y","Z"]])

In [22]:
enc.fit_transform(X)

array([[1., 0., 0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0., 0., 1.]])

In [23]:
Y = [["D","Z"]]

In [24]:
enc.transform(Y)

array([[0., 0., 0., 1., 0., 0., 1.]])