In [16]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
pd.set_option('display.max_columns', None)


In [17]:
#Building dummy dataset
data = {'EmployeeID': [10,15,20,25,30],
        'Gender': ['M', 'F', 'F', 'M', 'F'],
        'Remarks': ['Good', 'Nice', 'Good', 'Great', 'Nice']
        }
#Converting into pandas framework
df=pd.DataFrame(data)
print(f"Employee data: \n{df}")

Employee data: 
   EmployeeID Gender Remarks
0          10      M    Good
1          15      F    Nice
2          20      F    Good
3          25      M   Great
4          30      F    Nice


In [None]:
#Excracting categorical columns
cat_columns = df.select_dtypes(include=['object']).columns.tolist()#objects dataypes are categorical
print(f"\nCategorical columns: {cat_columns}")
for col in cat_columns:
    print(f"\nUnique values in {col}: {df[col].unique()}")

#Creating OneHotEncoder object
encoder = OneHotEncoder(sparse_output=False)

#Fitting and transforming the data
encoded_data = encoder.fit_transform(df[cat_columns])
print(f"\nEncoded data: \n{encoded_data}")

#Creating dataframe from the encoded data
encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(cat_columns))
print(f"\nEncoded DataFrame: \n{encoded_df}")

#Concatenating the original dataframe with the encoded dataframe
final_df = pd.concat([df, encoded_df], axis=1)
print(f"\nFinal DataFrame: \n{final_df}")


Categorical columns: ['Gender', 'Remarks']

Unique values in Gender: ['M' 'F']

Unique values in Remarks: ['Good' 'Nice' 'Great']

Encoded data: 
[[0. 1. 1. 0. 0.]
 [1. 0. 0. 0. 1.]
 [1. 0. 1. 0. 0.]
 [0. 1. 0. 1. 0.]
 [1. 0. 0. 0. 1.]]

Encoded DataFrame: 
   Gender_F  Gender_M  Remarks_Good  Remarks_Great  Remarks_Nice
0       0.0       1.0           1.0            0.0           0.0
1       1.0       0.0           0.0            0.0           1.0
2       1.0       0.0           1.0            0.0           0.0
3       0.0       1.0           0.0            1.0           0.0
4       1.0       0.0           0.0            0.0           1.0

Final DataFrame: 
   EmployeeID Gender Remarks  Gender_F  Gender_M  Remarks_Good  Remarks_Great  \
0          10      M    Good       0.0       1.0           1.0            0.0   
1          15      F    Nice       1.0       0.0           0.0            0.0   
2          20      F    Good       1.0       0.0           1.0            0.0   
3       