## The FunctionTransformer is a tool in scikit-learn, a popular Python library for machine learning,that allows you to apply a specified function to the input data. The FunctionTransformer can be useful for performing custom transformations of input data in a  machine learning pipeline.
The FunctionTransformer takes as input a single function that will be applied to each sample in the data.This function can be any Python function that takes a single argument, such as a lambda function or a user-defined function. The function should return the transformed sample.

In [4]:
import numpy as np

In [19]:
from sklearn.preprocessing import FunctionTransformer
X=np.array([[1,2],[3,4]])
## define the transformation function 
log_transform=FunctionTransformer(np.log1p)
## apply the transformation to the dataset 
X_transformed =log_transform.transform(X)
## view the tranformation data 
print(X_transformed)


[[0.69314718 1.09861229]
 [1.38629436 1.60943791]]


## types of function transformation in meachine learning 

In [21]:
# 1. coustom feature enginerring 
from sklearn.preprocessing import FunctionTransformer
# create a dataset 
x=np.array([[1,2],[3,4]])
# define a custom feature engineering function
def squ(x):
    return np.hstack((x,x**2))
# creating a function transformer to apply the custom function 
custom_transformer =FunctionTransformer(squ)
# apply the transformer to the 

In [24]:
from sklearn.preprocessing import FunctionTransformer
# create a dataset 
x=np.array([[1,2],[3,4]])
# define a custom 
def my_scaling(x):
    return x/np.max(x)

custom_transformer=FunctionTransformer(my_scaling)

x_transformer=custom_transformer.transform(X)

print(x_transformer)


[[0.25 0.5 ]
 [0.75 1.  ]]


In [26]:
from sklearn.preprocessing import FunctionTransformer
x=np.array([[1,2],[3,np.nan]])

def my_cleaning(X):
    x[np.isnan(x)]=0
    return x

custom_transformer=FunctionTransformer(my_cleaning)

x_transformer=custom_transformer.transform(x)
print(x_transformer)

[[1. 2.]
 [3. 0.]]


In [28]:
import numpy as np
import pandas as pd

In [30]:
df=pd.read_csv("C:\\Users\\Choud\\OneDrive\\Desktop\\data sets\\placement - placement.csv")

In [32]:
df

Unnamed: 0,cgpa,resume_score,placed
0,8.14,6.52,1
1,6.17,5.17,0
2,8.27,8.86,1
3,6.88,7.27,1
4,7.52,7.30,1
...,...,...,...
95,6.33,6.38,0
96,8.23,7.76,1
97,6.65,7.78,0
98,8.14,5.63,1


In [34]:
x=df.drop(columns=['placed'])
y=df['placed']

In [36]:
from sklearn.preprocessing import FunctionTransformer

In [38]:
log_transform=FunctionTransformer(np.log1p)
## apply the transformation to the dataset 
x_transformer=log_transform.transform(x)

In [40]:
x_transformer

Unnamed: 0,cgpa,resume_score
0,2.212660,2.017566
1,1.969906,1.819699
2,2.226783,2.288486
3,2.064328,2.112635
4,2.142416,2.116256
...,...,...
95,1.991976,1.998774
96,2.222459,2.170196
97,2.034706,2.172476
98,2.212660,1.891605


In [44]:
df=pd.read_csv("C:\\Users\\Choud\\OneDrive\\Desktop\\data sets\\covid_toy - covid_toy.csv")

In [46]:
df

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No
...,...,...,...,...,...,...
95,12,Female,104.0,Mild,Bangalore,No
96,51,Female,101.0,Strong,Kolkata,Yes
97,20,Female,101.0,Mild,Bangalore,No
98,5,Female,98.0,Strong,Mumbai,No


In [80]:
# covid data ---> encode ---> x data ----> log transformation 

In [50]:
df.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [76]:
from sklearn.impute  import SimpleImputer 

In [56]:
si =SimpleImputer()

In [58]:
df['fever']=si.fit_transform(df[['fever']])

In [62]:
df.isnull().sum()

age          0
gender       0
fever        0
cough        0
city         0
has_covid    0
dtype: int64

In [64]:
df['gender']=df['gender'].map({"Male":0 , "Female": 1})
df['cough']=df['cough'].map({"Mild":0 , "Strong": 1})
df['city'] = df['city'].map({"Kolkata":0 , "Bangalore":1 , "Delhi":2 , "Mumbai":3}) 
df['has_covid'] = df['has_covid'].map({'Yes':0 , 'No':1})

In [66]:
df

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,0,103.0,0,0,1
1,27,0,100.0,0,2,0
2,42,0,101.0,0,2,1
3,31,1,98.0,0,0,1
4,65,1,101.0,0,3,1
...,...,...,...,...,...,...
95,12,1,104.0,0,1,1
96,51,1,101.0,1,0,0
97,20,1,101.0,0,1,1
98,5,1,98.0,1,3,1


In [68]:
x=df.drop(columns=['has_covid'])
y=df['has_covid']

In [70]:
from sklearn.preprocessing import FunctionTransformer

In [72]:
log_transform=FunctionTransformer(np.log1p)
## apply the transformation to the dataset 
x_transformer=log_transform.transform(x)

In [74]:
x_transformer

Unnamed: 0,age,gender,fever,cough,city
0,4.110874,0.000000,4.644391,0.000000,0.000000
1,3.332205,0.000000,4.615121,0.000000,1.098612
2,3.761200,0.000000,4.624973,0.000000,1.098612
3,3.465736,0.693147,4.595120,0.000000,0.000000
4,4.189655,0.693147,4.624973,0.000000,1.386294
...,...,...,...,...,...
95,2.564949,0.693147,4.653960,0.000000,0.693147
96,3.951244,0.693147,4.624973,0.693147,0.000000
97,3.044522,0.693147,4.624973,0.000000,0.693147
98,1.791759,0.693147,4.595120,0.693147,1.386294
