# Encoding

In [14]:
from sklearn.preprocessing import LabelEncoder
data = ['cat', 'dog', 'fish', 'dog', 'cat']
le = LabelEncoder()
le.fit(data)
encoded_data = le.transform(data)
print("Original data:", data)
print("Encoded data:", encoded_data)

Original data: ['cat', 'dog', 'fish', 'dog', 'cat']
Encoded data: [0 1 2 1 0]


In [15]:
from sklearn.preprocessing import OneHotEncoder
data = [['cat'], ['dog'], ['fish'], ['dog'], ['cat']]
encoder = OneHotEncoder(sparse=False)  
encoder.fit(data)
encoded_data = encoder.transform(data)
print("Original data:", data)
print("Encoded data:", encoded_data)

Original data: [['cat'], ['dog'], ['fish'], ['dog'], ['cat']]
Encoded data: [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]]




In [16]:
from sklearn.preprocessing import OrdinalEncoder
data = [['low'], ['low'], ['medium'], ['medium'], ['high'], ['low'], ['medium'], ['high'], ['low']]
categories = [['low', 'medium', 'high']] 
encoder = OrdinalEncoder(categories=categories)  
encoder.fit(data)
encoded_data = encoder.transform(data)
print("Original data:", data)
print("Encoded data:", encoded_data)

Original data: [['low'], ['low'], ['medium'], ['medium'], ['high'], ['low'], ['medium'], ['high'], ['low']]
Encoded data: [[0.]
 [0.]
 [1.]
 [1.]
 [2.]
 [0.]
 [1.]
 [2.]
 [0.]]


In [17]:
def binary_encode(data, categories):
  num_categories = len(categories)
  encoded_data = []
  for item in data:
    binary_vector = [0] * num_categories  
    if item in categories:
      index = categories.index(item)
      binary_vector[index] = 1  
    encoded_data.append(binary_vector)
  return encoded_data
data = ['cat', 'dog', 'fish', 'dog', 'cat']
categories = ['cat', 'dog', 'fish']
encoded_data = binary_encode(data, categories)
print("Original data:", data)
print("Encoded data:", encoded_data)

Original data: ['cat', 'dog', 'fish', 'dog', 'cat']
Encoded data: [[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]]


In [18]:
import pandas as pd
def frequency_encode(data, column):
  value_counts = data[column].value_counts(normalize=True)
  encoded_data = data.copy()
  encoded_data[column] = encoded_data[column].map(value_counts)
  return encoded_data
data = pd.DataFrame({'category': ['A', 'A', 'B', 'B', 'A', 'C']})
encoded_data = frequency_encode(data.copy(), 'category')
print("Original data:", data)
print("Encoded data:", encoded_data)

Original data:   category
0        A
1        A
2        B
3        B
4        A
5        C
Encoded data:    category
0  0.500000
1  0.500000
2  0.333333
3  0.333333
4  0.500000
5  0.166667


# Standard Scaler

In [20]:
from sklearn.preprocessing import StandardScaler
data = [[10.0], [2.0], [7.0], [4.0], [12.0]]
scaler = StandardScaler()
scaler.fit(data)
scaled_data = scaler.transform(data)
print("Original data:", data)
print("Scaled data:", scaled_data)

Original data: [[10.0], [2.0], [7.0], [4.0], [12.0]]
Scaled data: [[ 0.81348922]
 [-1.35581536]
 [ 0.        ]
 [-0.81348922]
 [ 1.35581536]]
