# One hot encoding

In [12]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [13]:
data = {
    'EmployeeID': [10, 20, 15, 25, 30],
    'Gender': ['M', 'F', 'F', 'M', 'F'],
    'Remarks': ['Good', 'Nice', 'Good', 'Great', 'Nice'],
}

df = pd.DataFrame(data)

In [14]:
print(df)

   EmployeeID Gender Remarks
0          10      M    Good
1          20      F    Nice
2          15      F    Good
3          25      M   Great
4          30      F    Nice


In [15]:
# Extract categorical columns from the dataframe
# Here we extract columns with object data type as categorical columns
categorical_columsn = df.select_dtypes(include=['object']).columns.tolist()

# Initialize OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)

# Apply OneHotEncoder to categorical columns
one_hot_encoded = encoder.fit_transform(df[categorical_columsn])

# Create a DataFrame with the one-hot encoded columns
one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out())

# Concatenate the original dataframe (excluding categorical columns) with the one-hot encoded dataframe
df_encoded = pd.concat([df, one_hot_df], axis=1)

# Drop original categorical columns
df_encoded = df_encoded.drop(columns=categorical_columsn, axis=1)


In [16]:
print(df_encoded)

   EmployeeID  Gender_F  Gender_M  Remarks_Good  Remarks_Great  Remarks_Nice
0          10       0.0       1.0           1.0            0.0           0.0
1          20       1.0       0.0           0.0            0.0           1.0
2          15       1.0       0.0           1.0            0.0           0.0
3          25       0.0       1.0           0.0            1.0           0.0
4          30       1.0       0.0           0.0            0.0           1.0
