# Encoding

In [2]:
from sklearn.preprocessing import LabelEncoder
data = ['cat', 'dog', 'fish', 'dog', 'cat']
le = LabelEncoder()
le.fit(data)
encoded_data = le.transform(data)
print("Original data:", data)
print("Encoded data:", encoded_data)

Original data: ['cat', 'dog', 'fish', 'dog', 'cat']
Encoded data: [0 1 2 1 0]


In [3]:
from sklearn.preprocessing import OneHotEncoder
data = [['cat'], ['dog'], ['fish'], ['dog'], ['cat']]
encoder = OneHotEncoder(sparse=False)  
encoder.fit(data)
encoded_data = encoder.transform(data)
print("Original data:", data)
print("Encoded data:", encoded_data)

Original data: [['cat'], ['dog'], ['fish'], ['dog'], ['cat']]
Encoded data: [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]]




In [4]:
from sklearn.preprocessing import OrdinalEncoder
data = [['low'], ['low'], ['medium'], ['medium'], ['high'], ['low'], ['medium'], ['high'], ['low']]
categories = [['low', 'medium', 'high']] 
encoder = OrdinalEncoder(categories=categories)  
encoder.fit(data)
encoded_data = encoder.transform(data)
print("Original data:", data)
print("Encoded data:", encoded_data)

Original data: [['low'], ['low'], ['medium'], ['medium'], ['high'], ['low'], ['medium'], ['high'], ['low']]
Encoded data: [[0.]
 [0.]
 [1.]
 [1.]
 [2.]
 [0.]
 [1.]
 [2.]
 [0.]]


In [5]:
def binary_encode(data, categories):
  num_categories = len(categories)
  encoded_data = []
  for item in data:
    binary_vector = [0] * num_categories  
    if item in categories:
      index = categories.index(item)
      binary_vector[index] = 1  
    encoded_data.append(binary_vector)
  return encoded_data
data = ['cat', 'dog', 'fish', 'dog', 'cat']
categories = ['cat', 'dog', 'fish']
encoded_data = binary_encode(data, categories)
print("Original data:", data)
print("Encoded data:", encoded_data)

Original data: ['cat', 'dog', 'fish', 'dog', 'cat']
Encoded data: [[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]]


In [6]:
import pandas as pd
def frequency_encode(data, column):
  value_counts = data[column].value_counts(normalize=True)
  encoded_data = data.copy()
  encoded_data[column] = encoded_data[column].map(value_counts)
  return encoded_data
data = pd.DataFrame({'category': ['A', 'A', 'B', 'B', 'A', 'C']})
encoded_data = frequency_encode(data.copy(), 'category')
print("Original data:", data)
print("Encoded data:", encoded_data)

Original data:   category
0        A
1        A
2        B
3        B
4        A
5        C
Encoded data:    category
0  0.500000
1  0.500000
2  0.333333
3  0.333333
4  0.500000
5  0.166667


# Standard Scaler

In [7]:
from sklearn.preprocessing import StandardScaler
data = [[10.0], [2.0], [7.0], [4.0], [12.0]]
scaler = StandardScaler()
scaler.fit(data)
scaled_data = scaler.transform(data)
print("Original data:", data)
print("Scaled data:", scaled_data)

Original data: [[10.0], [2.0], [7.0], [4.0], [12.0]]
Scaled data: [[ 0.81348922]
 [-1.35581536]
 [ 0.        ]
 [-0.81348922]
 [ 1.35581536]]


# Min Max Scaling

In [8]:
from sklearn.preprocessing import MinMaxScaler

# Sample data
data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]

# Create the MinMaxScaler object
scaler = MinMaxScaler()

# Fit the scaler on the data (learn min and max for each feature)
scaler.fit(data)

# Transform the data (scale to 0-1 range by default)
scaled_data = scaler.transform(data)

# Print the original and scaled data
print("Original data:")
print(data)
print("\nScaled data:")
print(scaled_data)

Original data:
[[-1, 2], [-0.5, 6], [0, 10], [1, 18]]

Scaled data:
[[0.   0.  ]
 [0.25 0.25]
 [0.5  0.5 ]
 [1.   1.  ]]


# Robust Scaling

In [9]:
from sklearn.preprocessing import RobustScaler

# Sample data
data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]

# Create the RobustScaler object
scaler = RobustScaler()

# Fit the scaler on the data (learn median and IQR for each feature)
scaler.fit(data)

# Transform the data (scale using median and IQR)
scaled_data = scaler.transform(data)

# Print the original and scaled data
print("Original data:")
print(data)
print("\nScaled data:")
print(scaled_data)


Original data:
[[-1, 2], [-0.5, 6], [0, 10], [1, 18]]

Scaled data:
[[-0.85714286 -0.85714286]
 [-0.28571429 -0.28571429]
 [ 0.28571429  0.28571429]
 [ 1.42857143  1.42857143]]


# Linear Regression

In [10]:
from sklearn.linear_model import LinearRegression

# Sample data
X = [[1, 1], [1, 2], [2, 2], [2, 3]]
y = [3, 5, 7, 9]

# Create the LinearRegression object
model = LinearRegression()

# Fit the model on the data (learn coefficients)
model.fit(X, y)

# Make predictions
y_pred = model.predict([[3, 5]])

# Print the coefficients and prediction
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)
print("Prediction for [3, 5]:", y_pred)


Coefficients: [2. 2.]
Intercept: -0.9999999999999991
Prediction for [3, 5]: [15.]


# Logistic Regression

In [1]:
from sklearn.linear_model import LogisticRegression

# Sample data (binary classification)
X = [[1, 1], [1.5, 1.8], [5, 8], [8, 8.5]]
y = [0, 1, 1, 0]

# Create the LogisticRegression object
model = LogisticRegression(random_state=0)  # Set random_state for reproducibility

# Fit the model on the data (learn weights and bias)
model.fit(X, y)

# Make predictions
y_pred = model.predict([[3, 5]])
y_proba = model.predict_proba([[3, 5]])  # Get probability of each class

# Print predictions
print("Predicted class:", y_pred)
print("Probabilities:", y_proba)  # Probability of class 0 and 1


Predicted class: [1]
Probabilities: [[0.36125603 0.63874397]]
