In [1]:
import pandas as pd

# Sample DataFrame
data = {'ID': [1, 2, 3, 4, 5],
        'Category': ['A', 'B', 'A', 'C', 'B']}

df = pd.DataFrame(data)

# Perform one-hot encoding
one_hot_encoded_df = pd.get_dummies(df, columns=['Category'])  # , drop_first=True
print(one_hot_encoded_df)

   ID  Category_A  Category_B  Category_C
0   1        True       False       False
1   2       False        True       False
2   3        True       False       False
3   4       False       False        True
4   5       False        True       False


In [2]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Sample DataFrame
data = {'ID': [1, 2, 3, 4, 5],
        'Category': ['A', 'B', 'A', 'C', 'B']}
df = pd.DataFrame(data)

# Create the OneHotEncoder object with sparse_output=False
encoder = OneHotEncoder(sparse_output=False, drop='first')

# Apply the encoder and convert the output to integers
one_hot_encoded_data = encoder.fit_transform(df[['Category']]).astype('int')

# Create a new DataFrame with the one-hot encoded data
one_hot_encoded_df = pd.DataFrame(one_hot_encoded_data, columns=encoder.get_feature_names_out(['Category']))

# Concatenate the one-hot encoded DataFrame with the original DataFrame
result_df = pd.concat([df, one_hot_encoded_df], axis=1)

# Drop the original 'Category' column if needed
result_df.drop('Category', axis=1, inplace=True)

print(result_df)


   ID  Category_B  Category_C
0   1           0           0
1   2           1           0
2   3           0           0
3   4           0           1
4   5           1           0


In [3]:
from pandas.io.formats.format import NA
import pandas as pd

# Sample DataFrame
data = {'ID': [1, 2, 3, 4, 5],
        'Category': ['A', NA, 'A', 'C', 'B']}
df = pd.DataFrame(data)

# Perform label encoding
df['Category_encoded'] = df['Category'].astype('category').cat.codes

# df.drop('Category', axis=1, inplace=True)
print(df)

   ID Category  Category_encoded
0   1        A                 0
1   2     <NA>                -1
2   3        A                 0
3   4        C                 2
4   5        B                 1


In [4]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Sample data with categorical variables
data = [
    ['red', 'small'],
    ['blue', 'large'],
    ['green', 'medium'],
]

# Create an instance of OneHotEncoder with sparse_output=True
encoder_sparse = OneHotEncoder(sparse_output=True)
encoded_data_sparse = encoder_sparse.fit_transform(data)

print("Sparse Encoding:")
print(encoded_data_sparse)


Sparse Encoding:
  (0, 2)	1.0
  (0, 5)	1.0
  (1, 0)	1.0
  (1, 3)	1.0
  (2, 1)	1.0
  (2, 4)	1.0


In [5]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Sample data with categorical variables
data = [
    ['red', 'small'],
    ['blue', 'large'],
    ['green', 'medium'],
]

# Create an instance of OneHotEncoder with sparse_output=False
encoder_dense = OneHotEncoder(sparse_output=False)
encoded_data_dense = encoder_dense.fit_transform(data)

print("Dense Encoding:")
print(encoded_data_dense)


Dense Encoding:
[[0. 0. 1. 0. 0. 1.]
 [1. 0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 1. 0.]]


In [6]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder

# Sample DataFrame with categorical features and target variable
data = {
    'Category': ['A', 'B', 'A', 'C', 'B'],
    'Color': ['Red', 'Blue', 'Green', 'Red', 'Green'],
    'Target': ['Yes', 'No', 'No', 'Yes', 'Yes']
}
df = pd.DataFrame(data)

# Step 3: Identify the categorical features and target variable
categorical_features = ['Category', 'Color']
target_variable = 'Target'

# Step 4: Use OrdinalEncoder for categorical features and LabelEncoder for the target variable
ordinal_encoder = OrdinalEncoder()
label_encoder = LabelEncoder()

# Fit and transform the categorical features using OrdinalEncoder
df[categorical_features] = ordinal_encoder.fit_transform(df[categorical_features]).astype('int')

# Fit and transform the target variable using LabelEncoder
df[target_variable] = label_encoder.fit_transform(df[target_variable])

# Step 6: Optionally, use inverse_transform to decode the labels back to their original values
original_category_labels = ordinal_encoder.inverse_transform(df[categorical_features].values)
original_target_labels = label_encoder.inverse_transform(df[target_variable].values)

print("Encoded DataFrame:")
print(df)

print("\nOriginal Category Labels:")

Encoded DataFrame:
   Category  Color  Target
0         0      2       1
1         1      0       0
2         0      1       0
3         2      2       1
4         1      1       1

Original Category Labels:
