In [1]:
import pandas as pd

df = pd.DataFrame({
    "name": ["Swift", "Verna", "Polo", "Creta", "Innova"],
    "brand": ["Suzuki", "Hyundai", "VW", "Hyundai", "Toyota"],
    "color": ["Red", "White", "Red", "Black", "White"]
    })

print(df)

     name    brand  color
0   Swift   Suzuki    Red
1   Verna  Hyundai  White
2    Polo       VW    Red
3   Creta  Hyundai  Black
4  Innova   Toyota  White


## Encoding Categorical Variables

In [2]:
import numpy as np
df_one_hot_encoded = pd.get_dummies(data = df, columns = ["brand", "color"])

print(df_one_hot_encoded )

     name  brand_Hyundai  brand_Suzuki  brand_Toyota  brand_VW  color_Black  \
0   Swift          False          True         False     False        False   
1   Verna           True         False         False     False        False   
2    Polo          False         False         False      True        False   
3   Creta           True         False         False     False         True   
4  Innova          False         False          True     False        False   

   color_Red  color_White  
0       True        False  
1      False         True  
2       True        False  
3      False        False  
4      False         True  


In [3]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

cols_to_encode = ["brand", "color"]
new_encoded_cols_names = []

# Create a list of new columns names
for col in cols_to_encode:
    new_encoded_cols_names += [f"is_{category}" for category in df[col].unique().tolist()]

print(new_encoded_cols_names)
#['is_Suzuki', 'is_Hyundai', 'is_VW', 'is_Toyota', 'is_Red', 'is_White', 'is_Black']

# Inititalize and fit One Hot Encoder class from sklearn
one_hot_encoder = OneHotEncoder(sparse = False, handle_unknown = 'ignore')
encoded_cols = one_hot_encoder.fit_transform(df[cols_to_encode])

# As the output of the above transformer would be a sparse matrix, we need to convert it back to Pandas dataframe
df_encoded = pd.DataFrame(encoded_cols, columns = new_encoded_cols_names)
df_one_hot_encoded = df.join(df_encoded)

print(df_one_hot_encoded)

['is_Suzuki', 'is_Hyundai', 'is_VW', 'is_Toyota', 'is_Red', 'is_White', 'is_Black']
     name    brand  color  is_Suzuki  is_Hyundai  is_VW  is_Toyota  is_Red  \
0   Swift   Suzuki    Red        0.0         1.0    0.0        0.0     0.0   
1   Verna  Hyundai  White        1.0         0.0    0.0        0.0     0.0   
2    Polo       VW    Red        0.0         0.0    0.0        1.0     0.0   
3   Creta  Hyundai  Black        1.0         0.0    0.0        0.0     1.0   
4  Innova   Toyota  White        0.0         0.0    1.0        0.0     0.0   

   is_White  is_Black  
0       1.0       0.0  
1       0.0       1.0  
2       1.0       0.0  
3       0.0       0.0  
4       0.0       1.0  




In [5]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
for i in df:
    df[i] = label_encoder.fit_transform(df[i])

In [6]:
df.brand.value_counts()

brand
0    2
1    1
3    1
2    1
Name: count, dtype: int64

In [7]:
df

Unnamed: 0,brand,color
0,1,1
1,0,2
2,3,0
3,2,1
4,0,0


## Feature Scaling

In [8]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Create a DataFrame
data = pd.DataFrame({
    'age': [35, 45, 28, 52, 41],
    'income': [50000, 60000, 35000, 75000, 80000]
})

# Create a scaler object
scaler = StandardScaler()

# Fit the scaler on the data
scaler.fit(data)

# Transform the data
scaled_data = scaler.transform(data)

# Print the scaled data
print(scaled_data)


[[-0.63170839 -0.60858062]
 [ 0.58311544  0.        ]
 [-1.48208507 -1.52145155]
 [ 1.43349212  0.91287093]
 [ 0.09718591  1.21716124]]


In [9]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Create a DataFrame
data = pd.DataFrame({
    'age': [35, 45, 28, 52, 41],
    'income': [50000, 60000, 35000, 75000, 80000]
})

# Create a scaler object
scaler = MinMaxScaler()

# Fit the scaler on the data
scaler.fit(data)

# Transform the data
scaled_data = scaler.transform(data)

# Print the scaled data
print(scaled_data)


[[0.29166667 0.33333333]
 [0.70833333 0.55555556]
 [0.         0.        ]
 [1.         0.88888889]
 [0.54166667 1.        ]]
