In [10]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Sample employee dataset
data = {
    'Employee ID': [101, 102, 103, 104, 105],
    'Department': ['HR', 'IT', 'Finance', 'IT', 'HR'],
    'Job Level': ['Junior', 'Senior', 'Mid', 'Senior', 'Junior'],
    'City': ['New York', 'San Francisco', 'New York', 'Austin', 'Austin'],
    'Salary': [50000, 80000, 60000, 85000, 52000]  # numerical (no encoding)
}

df = pd.DataFrame(data)
print("Original DataFrame:\n", df, "\n")

# ----------------------------
# 1️⃣ One-Hot Encoding with Pandas
# ----------------------------
df_pandas_encoded = pd.get_dummies(df, columns=['Department', 'Job Level', 'City'], drop_first=False)
print("One-Hot Encoded with Pandas:\n", df_pandas_encoded, "\n")

# ----------------------------
# 2️⃣ One-Hot Encoding with Scikit-learn
# ----------------------------
encoder = OneHotEncoder(sparse_output=False, drop=None)  # keep all categories

# Fit-transform only categorical columns
categorical_cols = ['Department', 'Job Level', 'City']
one_hot_encoded = encoder.fit_transform(df[categorical_cols])

# Convert to DataFrame with proper column names
one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(categorical_cols))

# Combine encoded data with numerical columns
df_sklearn_encoded = pd.concat([df[['Employee ID', 'Salary']], one_hot_df], axis=1)
print("One-Hot Encoded with Scikit-learn:\n", df_sklearn_encoded)


Original DataFrame:
    Employee ID Department Job Level           City  Salary
0          101         HR    Junior       New York   50000
1          102         IT    Senior  San Francisco   80000
2          103    Finance       Mid       New York   60000
3          104         IT    Senior         Austin   85000
4          105         HR    Junior         Austin   52000 

One-Hot Encoded with Pandas:
    Employee ID  Salary  Department_Finance  Department_HR  Department_IT  \
0          101   50000               False           True          False   
1          102   80000               False          False           True   
2          103   60000                True          False          False   
3          104   85000               False          False           True   
4          105   52000               False           True          False   

   Job Level_Junior  Job Level_Mid  Job Level_Senior  City_Austin  \
0              True          False             False        False  