Function Transformer :
It is a tool in scikit-learn, a popular Python Library for machine learning, that allows you to apply a specified function to the input data. The Function Transformer can be useful for performing custom transformations of input data in a machine learning pipeline.

In [20]:
from sklearn.preprocessing import FunctionTransformer
import numpy as np
import pandas as pd


In [5]:
# Creating an array
x = np.array([[1, 2], [3, 4]])

# Building logic
log_transform = FunctionTransformer(np.log1p)

# Applying the function transform to the dataset
x_transformed = log_transform.transform(x)

print(x_transformed)

[[0.69314718 1.09861229]
 [1.38629436 1.60943791]]


In [7]:
## Similarly, Creating Array
x = np.array([[1,2], [3,4]])

## Defining a custom feature engineering Function 
def squ(x):
    return np.hstack((x, x**2))

## Defining Function Transformer to apply the custom function
sq_transform = FunctionTransformer(squ)

## Applying Function Transformer
x_new = sq_transform.fit_transform(x)
x_new


array([[ 1,  2,  1,  4],
       [ 3,  4,  9, 16]])

In [9]:
## Creating Datset
x = np.array([[1,2], [3,4]])

# define a custom scaling function
def my_scaling(x):
    return x / np.max(x)

# create a Function Transformer to apply the custom function
custom_transformer = FunctionTransformer(my_scaling)

# apply the transformer to the input data
x_transformed = custom_transformer.transform(x)

# View the transformed data
print(x_transformed)

[[0.25 0.5 ]
 [0.75 1.  ]]


In [10]:
# Data Cleaning

# Creating a array with a missing value
x = np.array([[1,2], [3, np.nan]])

# define a custom cleaning function
def my_cleaning(x):
    x[np.isnan(x)] = 0
    return x

# create a Function Transformer to apply the custom function
custom_transformer = FunctionTransformer(my_cleaning)

# apply the transformer to the input data
x_transformed = custom_transformer.transform(x)

# view the transformed data
print(x_transformed)

[[1. 2.]
 [3. 0.]]


In [12]:
df1 = pd.read_csv("C:\\Users\\vedan\OneDrive\\Documents\\Regex\\Python\ML\\CSV Files\\newplacementdata.csv")
df1.head()

  df1 = pd.read_csv("C:\\Users\\vedan\OneDrive\\Documents\\Regex\\Python\ML\\CSV Files\\newplacementdata.csv")


Unnamed: 0,cgpa,placement_exam_marks,placed
0,7.19,26,1
1,7.46,38,1
2,7.54,40,1
3,6.42,8,1
4,7.23,17,0


In [14]:
x = df1.drop(columns= ['placed'])
y = df1['placed']

In [15]:
log_transform = FunctionTransformer(np.log1p)
x_new = log_transform.transform(x)
x_new

Unnamed: 0,cgpa,placement_exam_marks
0,2.102914,3.295837
1,2.135349,3.663562
2,2.144761,3.713572
3,2.004179,2.197225
4,2.107786,2.890372
...,...,...
995,2.289500,3.806662
996,2.314514,4.189655
997,1.773256,3.555348
998,2.263844,3.850148


In [16]:
df2 = pd.read_csv("C:\\Users\\vedan\OneDrive\\Documents\\Regex\\Python\ML\\CSV Files\\insurance.csv")
df2.head()

  df2 = pd.read_csv("C:\\Users\\vedan\OneDrive\\Documents\\Regex\\Python\ML\\CSV Files\\insurance.csv")


Unnamed: 0,age,sex,bmi,children,smoker,region
0,19,female,27.9,0,yes,southwest
1,18,male,33.77,1,no,southeast
2,28\n33,male\nmale,33\n22.705,3\n0,no\nno,southeast\nnorthwest
3,32,male,28.88,0,no,northwest
4,31,female,25.74,0,no,southeast


In [18]:
from sklearn.preprocessing import LabelEncoder
lb = LabelEncoder()

df2['sex'] = lb.fit_transform(df2['sex'])
df2['smoker'] = lb.fit_transform(df2['smoker'])
df2['region'] = lb.fit_transform(df2['region'])

In [21]:
num_cols = df2.select_dtypes(include=['int64', 'float64']).columns


In [22]:
log_transform = FunctionTransformer(np.log1p)

df2_new = df2.copy()
df2_new[num_cols] = log_transform.fit_transform(df2[num_cols])

df2_new


Unnamed: 0,age,sex,bmi,children,smoker,region
0,19,0.0,27.9,0,1.386294,2.397895
1,18,1.098612,33.77,1,0.0,1.791759
2,28\n33,1.609438,33\n22.705,3\n0,0.693147,2.079442
3,32,1.098612,28.88,0,0.0,1.609438
4,31,0.0,25.74,0,0.0,1.791759
5,46\n37,0.693147,33.44\n27.74,1\n3,0.693147,2.079442
6,37,1.098612,29.83,2,0.0,0.0
7,60,0.0,25.84,0,0.0,1.609438
8,25\n62,1.386294,26.22\n26.29,0\n0,1.098612,1.098612
9,23,1.098612,34.4,0,0.0,2.397895


In [23]:
df2 = df2.apply(pd.to_numeric, errors='coerce')


In [24]:
df2_new = log_transform.fit_transform(df2)
