# Nominal  / One Hot Encoding

In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

df = pd.DataFrame({
    'color':['red','blue','green','red','green','blue']
})

In [2]:
df

Unnamed: 0,color
0,red
1,blue
2,green
3,red
4,green
5,blue


In [3]:
# Create an instance of one hot coder
encoder= OneHotEncoder()
encoded = encoder.fit_transform(df[['color']]) # sorts alphabetically so blue , green , red

In [4]:
encoded

<6x3 sparse matrix of type '<class 'numpy.float64'>'
	with 6 stored elements in Compressed Sparse Row format>

In [5]:
encoded_df = pd.DataFrame(encoded.toarray(),columns=encoder.get_feature_names_out())

In [6]:
encoded_df

Unnamed: 0,color_blue,color_green,color_red
0,0.0,0.0,1.0
1,1.0,0.0,0.0
2,0.0,1.0,0.0
3,0.0,0.0,1.0
4,0.0,1.0,0.0
5,1.0,0.0,0.0


In [7]:
df= pd.concat([df,encoded_df],axis=1)

In [8]:
df

Unnamed: 0,color,color_blue,color_green,color_red
0,red,0.0,0.0,1.0
1,blue,1.0,0.0,0.0
2,green,0.0,1.0,0.0
3,red,0.0,0.0,1.0
4,green,0.0,1.0,0.0
5,blue,1.0,0.0,0.0


# One Hot Encoding With Multiple Categories


In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

data = pd.DataFrame({
    'Color': ['Red', 'Green', 'Blue', 'Green', 'Red'],
    'Size': ['S', 'M', 'L', 'M', 'XL']
})

print("Original Data:")
print(data)

# Step 2: Initialize OneHotEncoder
encoder = OneHotEncoder()

# Step 3: Fit and transform the categorical columns
encoded_array = encoder.fit_transform(data[['Color', 'Size']])

Original Data:
   Color Size
0    Red    S
1  Green    M
2   Blue    L
3  Green    M
4    Red   XL


In [8]:
print(encoded_array)

  (0, 2)	1.0
  (0, 5)	1.0
  (1, 1)	1.0
  (1, 4)	1.0
  (2, 0)	1.0
  (2, 3)	1.0
  (3, 1)	1.0
  (3, 4)	1.0
  (4, 2)	1.0
  (4, 6)	1.0


In [10]:
print(encoded_array.toarray())

[[0. 0. 1. 0. 0. 1. 0.]
 [0. 1. 0. 0. 1. 0. 0.]
 [1. 0. 0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 1.]]


In [6]:
encoded_df = pd.DataFrame(
    encoded_array.toarray(),
    columns=encoder.get_feature_names_out(['Color', 'Size'])
)


In [7]:

# Step 5: Combine with original DataFrame if needed
final_df = pd.concat([data, encoded_df], axis=1)

print("\nOne-Hot Encoded Data:")
print(final_df)


One-Hot Encoded Data:
   Color Size  Color_Blue  Color_Green  Color_Red  Size_L  Size_M  Size_S  \
0    Red    S         0.0          0.0        1.0     0.0     0.0     1.0   
1  Green    M         0.0          1.0        0.0     0.0     1.0     0.0   
2   Blue    L         1.0          0.0        0.0     1.0     0.0     0.0   
3  Green    M         0.0          1.0        0.0     0.0     1.0     0.0   
4    Red   XL         0.0          0.0        1.0     0.0     0.0     0.0   

   Size_XL  
0      0.0  
1      0.0  
2      0.0  
3      0.0  
4      1.0  
