In [1]:
from sklearn.preprocessing import FunctionTransformer

In [2]:
import numpy as np

In [3]:
#create dataset
X = np.array([[1,2],[3,4]])
# Define the transformation function
log_transform = FunctionTransformer(np.log1p)
# apply the trasformation function
X_transformed = log_transform.transform(X)

#View the transformed data
print(X_transformed)

[[0.69314718 1.09861229]
 [1.38629436 1.60943791]]


In [4]:
#practical usecases

In [5]:
# custom Feature Engineering
from sklearn.preprocessing import FunctionTransformer
import numpy as np

In [6]:
X = np.array([[1,2],[3,4]])

def my_feature_eng(X):
    return np.hstack((X, X**2))

custom_transformer = FunctionTransformer(my_feature_eng)

X_transformed = custom_transformer.transform(X)

print(X_transformed)

[[ 1  2  1  4]
 [ 3  4  9 16]]


In [7]:
# 2. Scaling and Normalizaton
from sklearn.preprocessing import FunctionTransformer
import numpy as np

X = np.array([[1,2],[3,4]])

def my_scaling(X):
    return X / np.max(X)
custom_transformer = FunctionTransformer(my_scaling)

X_transformed = custom_transformer.transform(X)

print(X_transformed)


[[0.25 0.5 ]
 [0.75 1.  ]]


In [10]:
# 2. Data Cleaning
from sklearn.preprocessing import FunctionTransformer
import numpy as np

# Create a dataset with missing value
X = np.array([[1,2],[3,np.nan]])
#define a custom cleaning function
def my_cleaning(X):
    X[np.isnan(X)] = 0
    return X
# create a FunctionTransformer to apply the custom Function
custom_transformer = FunctionTransformer(my_cleaning)
#apply the transformer to the input data
X_transformed = custom_transformer.transform(X)
#View the transformed data
print(X_transformed)


[[1. 2.]
 [3. 0.]]


In [13]:
# 2. Dimensionality reduction
from sklearn.preprocessing import FunctionTransformer
from sklearn.decomposition import PCA
import numpy as np

# Create a dataset 
X = np.array([[1,2],[3,4]])

#define a custom PCA function
def my_pca(X):
    pca = PCA(n_components = 1)
    X_pca = pca.fit_transform(X)
    return X_pca

# create a FunctionTransformer to apply the custom Function
custom_transformer = FunctionTransformer(my_pca)

#apply the transformer to the input data
X_transformed = custom_transformer.transform(X)

#View the transformed data
print(X_transformed)


[[-1.41421356]
 [ 1.41421356]]


# Real Life use of Function Transformer

In [19]:
import numpy as np 
import pandas as pd

df = pd.read_csv("C:/Users/HP/Downloads/placement.csv")


In [20]:
df.head()

Unnamed: 0,cgpa,resume_score,placed
0,8.14,6.52,1
1,6.17,5.17,0
2,8.27,8.86,1
3,6.88,7.27,1
4,7.52,7.3,1


In [22]:
from sklearn.preprocessing import FunctionTransformer
x = df.drop(columns = ['placed'],axis = 1)
y = df['placed']

In [24]:
from sklearn.model_selection import train_test_split
x_train , x_test,y_train , y_test = train_test_split(x,y,test_size = 0.2,random_state = 42)

In [26]:
trf = FunctionTransformer(func = np.log1p)

x_train_transformed = trf.fit_transform(x_train)
x_test_transformed = trf.transform(x_test)

In [27]:
x_train_transformed

Unnamed: 0,cgpa,resume_score
55,2.071913,2.063058
88,2.204972,1.955860
26,1.983756,2.010895
42,2.066863,2.068128
69,2.203869,2.308567
...,...,...
60,1.927164,2.033398
71,2.121063,2.221375
14,2.141242,1.981001
92,1.860975,1.971299


In [62]:
import numpy as np 
import pandas as pd

df = pd.read_csv("C:/Users/HP/Downloads/covid_toy.csv")

In [63]:
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [65]:
from sklearn.preprocessing import LabelEncoder

L = LabelEncoder()

df['gender'] = L.fit_transform(df['gender'])
df['cough'] = L.fit_transform(df['cough'])
df['city'] = L.fit_transform(df['city'])
df['has_covid'] = L.fit_transform(df['has_covid'])

In [73]:
x = df.drop(columns = ['has_covid'],axis =1)
y = df["has_covid"]
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import train_test_split

x_train , x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state = 42)
x_train

Unnamed: 0,age,gender,fever,cough,city
55,81,0,101.0,0,3
88,5,0,100.0,0,2
26,19,0,100.0,0,2
42,27,1,100.0,0,1
69,73,0,103.0,0,1
...,...,...,...,...,...
60,24,0,102.0,1,0
71,75,0,104.0,1,1
14,51,1,104.0,0,0
92,82,0,102.0,1,2


In [74]:
trf = FunctionTransformer(func = np.log1p)

x_train_transformed = trf.fit_transform(x_train)
x_test_transformed = trf.transform(x_test)

In [75]:
x_train_transformed

Unnamed: 0,age,gender,fever,cough,city
55,4.406719,0.000000,4.624973,0.000000,1.386294
88,1.791759,0.000000,4.615121,0.000000,1.098612
26,2.995732,0.000000,4.615121,0.000000,1.098612
42,3.332205,0.693147,4.615121,0.000000,0.693147
69,4.304065,0.000000,4.644391,0.000000,0.693147
...,...,...,...,...,...
60,3.218876,0.000000,4.634729,0.693147,0.000000
71,4.330733,0.000000,4.653960,0.693147,0.693147
14,3.951244,0.693147,4.653960,0.000000,0.000000
92,4.418841,0.000000,4.634729,0.693147,1.098612


In [79]:
import numpy as np
import pandas as pd

df = pd.read_csv("C:/Users/HP/Downloads/placement.csv")

In [80]:
df

Unnamed: 0,cgpa,resume_score,placed
0,8.14,6.52,1
1,6.17,5.17,0
2,8.27,8.86,1
3,6.88,7.27,1
4,7.52,7.30,1
...,...,...,...
95,6.33,6.38,0
96,8.23,7.76,1
97,6.65,7.78,0
98,8.14,5.63,1


In [81]:
x = df.drop(columns = ['placed'])
y = df['placed']

In [83]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import train_test_split

x_train, x_test, y_train , y_test = train_test_split(x,y,test_size=0.2,random_state = 42)

In [84]:
trf = FunctionTransformer(func = np.log1p)

x_train_transformed = trf.fit_transform(x_train)
x_test_transformed = trf.transform(x_test )

In [85]:
x_train_transformed

Unnamed: 0,cgpa,resume_score
55,2.071913,2.063058
88,2.204972,1.955860
26,1.983756,2.010895
42,2.066863,2.068128
69,2.203869,2.308567
...,...,...
60,1.927164,2.033398
71,2.121063,2.221375
14,2.141242,1.981001
92,1.860975,1.971299


In [94]:
import numpy as np
import pandas as pd

df = pd.read_csv("C:/Users/HP/Downloads/Employee.csv")

In [95]:
df

Unnamed: 0,Education,JoiningYear,City,PaymentTier,Age,Gender,EverBenched,ExperienceInCurrentDomain,LeaveOrNot
0,Bachelors,2017,Bangalore,3,34,Male,No,0,0
1,Bachelors,2013,Pune,1,28,Female,No,3,1
2,Bachelors,2014,New Delhi,3,38,Female,No,2,0
3,Masters,2016,Bangalore,3,27,Male,No,5,1
4,Masters,2017,Pune,3,24,Male,Yes,2,1
...,...,...,...,...,...,...,...,...,...
4648,Bachelors,2013,Bangalore,3,26,Female,No,4,0
4649,Masters,2013,Pune,2,37,Male,No,2,1
4650,Masters,2018,New Delhi,3,27,Male,No,5,1
4651,Bachelors,2012,Bangalore,3,30,Male,Yes,2,0


In [97]:
df['LeaveOrNot'].value_counts()

0    3053
1    1600
Name: LeaveOrNot, dtype: int64

In [99]:
from sklearn.preprocessing import LabelEncoder
L = LabelEncoder()

df['Education'] = L.fit_transform(df['Education'])
df['City'] = L.fit_transform(df['City'])
df['Gender'] = L.fit_transform(df['Gender'])
df['EverBenched'] = L.fit_transform(df['EverBenched'])

In [100]:
df

Unnamed: 0,Education,JoiningYear,City,PaymentTier,Age,Gender,EverBenched,ExperienceInCurrentDomain,LeaveOrNot
0,0,2017,0,3,34,1,0,0,0
1,0,2013,2,1,28,0,0,3,1
2,0,2014,1,3,38,0,0,2,0
3,1,2016,0,3,27,1,0,5,1
4,1,2017,2,3,24,1,1,2,1
...,...,...,...,...,...,...,...,...,...
4648,0,2013,0,3,26,0,0,4,0
4649,1,2013,2,2,37,1,0,2,1
4650,1,2018,1,3,27,1,0,5,1
4651,0,2012,0,3,30,1,1,2,0


In [102]:
x = df.drop(columns = ['LeaveOrNot'])
y = df["LeaveOrNot"]

In [103]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import train_test_split

x_train , x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2,random_state = 42)

In [106]:
trf = FunctionTransformer(func = np.log1p)

x_train_transformed = trf.fit_transform(x_train)
x_test_transformed = trf.transform(x_test)

In [109]:
x_train_transformed

Unnamed: 0,Education,JoiningYear,City,PaymentTier,Age,Gender,EverBenched,ExperienceInCurrentDomain
2850,0.693147,7.607878,0.693147,1.386294,3.433987,0.693147,0.000000,0.000000
589,0.000000,7.607381,0.000000,1.386294,3.258097,0.693147,0.000000,1.386294
2086,0.693147,7.609862,1.098612,1.098612,3.401197,0.693147,0.000000,1.098612
445,0.693147,7.607381,1.098612,1.386294,3.218876,0.693147,0.000000,1.098612
3654,0.693147,7.609862,0.693147,1.098612,3.583519,0.693147,0.000000,1.098612
...,...,...,...,...,...,...,...,...
4426,0.000000,7.608374,0.000000,1.386294,3.496508,0.693147,0.000000,0.693147
466,0.000000,7.607878,0.000000,1.386294,3.295837,0.693147,0.693147,1.609438
3092,0.000000,7.609367,0.000000,1.386294,3.688879,0.000000,0.000000,0.693147
3772,0.000000,7.608871,0.000000,1.386294,3.737670,0.693147,0.000000,0.693147
